X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/fdf4dddd9dbb73e29cd736f1be8a86925999c9b8..5778acb683d3de419b2dfba40a650762eeedaa14:/pp.c diff --git a/pp.c b/pp.c index 02ad594..4e2d26a 100644 --- a/pp.c +++ b/pp.c @@ -47,6 +47,9 @@ extern Pid_t getpid (void); _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_; #endif +static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1; +static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1; + /* variations on pp_null */ PP(pp_stub) @@ -65,8 +68,8 @@ PP(pp_padav) dVAR; dSP; dTARGET; I32 gimme; assert(SvTYPE(TARG) == SVt_PVAV); - if (PL_op->op_private & OPpLVAL_INTRO) - if (!(PL_op->op_private & OPpPAD_STATE)) + if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO )) + if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) )) SAVECLEARSV(PAD_SVl(PL_op->op_targ)); EXTEND(SP, 1); if (PL_op->op_flags & OPf_REF) { @@ -85,23 +88,27 @@ PP(pp_padav) gimme = GIMME_V; if (gimme == G_ARRAY) { /* XXX see also S_pushav in pp_hot.c */ - const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1; + const Size_t maxarg = AvFILL(MUTABLE_AV(TARG)) + 1; EXTEND(SP, maxarg); if (SvMAGICAL(TARG)) { - U32 i; - for (i=0; i < (U32)maxarg; i++) { + Size_t i; + for (i=0; i < maxarg; i++) { SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE); SP[i+1] = (svp) ? *svp : &PL_sv_undef; } } else { - Copy(AvARRAY((const AV *)TARG), SP+1, maxarg, SV*); + PADOFFSET i; + for (i=0; i < (PADOFFSET)maxarg; i++) { + SV * const sv = AvARRAY((const AV *)TARG)[i]; + SP[i+1] = sv ? sv : &PL_sv_undef; + } } SP += maxarg; } else if (gimme == G_SCALAR) { SV* const sv = sv_newmortal(); - const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1; + const SSize_t maxarg = AvFILL(MUTABLE_AV(TARG)) + 1; sv_setiv(sv, maxarg); PUSHs(sv); } @@ -115,8 +122,8 @@ PP(pp_padhv) assert(SvTYPE(TARG) == SVt_PVHV); XPUSHs(TARG); - if (PL_op->op_private & OPpLVAL_INTRO) - if (!(PL_op->op_private & OPpPAD_STATE)) + if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO )) + if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) )) SAVECLEARSV(PAD_SVl(PL_op->op_targ)); if (PL_op->op_flags & OPf_REF) RETURN; @@ -174,7 +181,7 @@ PP(pp_clonecv) /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV to introcv and remove the SvPADSTALE_off. */ SAVEPADSVANDMORTALIZE(ARGTARG); - PAD_SVl(ARGTARG) = mg->mg_obj; + PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(mg->mg_obj); } else { if (CvROOT(mg->mg_obj)) { @@ -219,8 +226,9 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict, SvREFCNT_inc_void_NN(sv); sv = MUTABLE_SV(gv); } - else if (!isGV_with_GP(sv)) - return (SV *)Perl_die(aTHX_ "Not a GLOB reference"); + else if (!isGV_with_GP(sv)) { + Perl_die(aTHX_ "Not a GLOB reference"); + } } else { if (!isGV_with_GP(sv)) { @@ -234,13 +242,15 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict, Perl_croak_no_modify(); if (cUNOP->op_targ) { SV * const namesv = PAD_SV(cUNOP->op_targ); + HV *stash = CopSTASH(PL_curcop); + if (SvTYPE(stash) != SVt_PVHV) stash = NULL; gv = MUTABLE_GV(newSV(0)); - gv_init_sv(gv, CopSTASH(PL_curcop), namesv, 0); + gv_init_sv(gv, stash, namesv, 0); } else { const char * const name = CopSTASHPV(PL_curcop); gv = newGVgen_flags(name, - HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 ); + HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 ); } prepare_SV_for_RV(sv); SvRV_set(sv, MUTABLE_SV(gv)); @@ -248,8 +258,9 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict, SvSETMAGIC(sv); goto wasref; } - if (PL_op->op_flags & OPf_REF || strict) - return (SV *)Perl_die(aTHX_ PL_no_usym, "a symbol"); + if (PL_op->op_flags & OPf_REF || strict) { + Perl_die(aTHX_ PL_no_usym, "a symbol"); + } if (ckWARN(WARN_UNINITIALIZED)) report_uninit(sv); return &PL_sv_undef; @@ -262,14 +273,14 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict, return &PL_sv_undef; } else { - if (strict) - return - (SV *)Perl_die(aTHX_ - S_no_symref_sv, - sv, - (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), - "a symbol" - ); + if (strict) { + Perl_die(aTHX_ + S_no_symref_sv, + sv, + (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), + "a symbol" + ); + } if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV)) == OPpDONT_INIT_GV) { /* We are the target of a coderef assignment. Return @@ -425,7 +436,7 @@ PP(pp_pos) dVAR; dSP; dPOPss; if (PL_op->op_flags & OPf_MOD || LVRET) { - SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ + SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */ sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0); LvTYPE(ret) = '.'; LvTARG(ret) = SvREFCNT_inc_simple(sv); @@ -433,18 +444,16 @@ PP(pp_pos) RETURN; } else { - if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { - const MAGIC * const mg = mg_find(sv, PERL_MAGIC_regex_global); - if (mg && mg->mg_len >= 0) { + const MAGIC * const mg = mg_find_mglob(sv); + if (mg && mg->mg_len != -1) { dTARGET; - I32 i = mg->mg_len; - if (DO_UTF8(sv)) - sv_pos_b2u(sv, &i); - PUSHi(i); + STRLEN i = mg->mg_len; + if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv)) + i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN); + PUSHu(i); RETURN; } - } - RETPUSHUNDEF; + RETPUSHUNDEF; } } @@ -455,7 +464,8 @@ PP(pp_rv2cv) HV *stash_unused; const I32 flags = (PL_op->op_flags & OPf_SPECIAL) ? GV_ADDMG - : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT) + : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) + == OPpMAY_RETURN_CONSTANT) ? GV_ADD|GV_NOEXPAND : GV_ADD; /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */ @@ -485,11 +495,9 @@ PP(pp_prototype) const char * s = SvPVX_const(TOPs); if (strnEQ(s, "CORE::", 6)) { const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1); - if (!code || code == -KEY_CORE) - DIE(aTHX_ "Can't find an opnumber for \"%"SVf"\"", - SVfARG(newSVpvn_flags( - s+6, SvCUR(TOPs)-6, SvFLAGS(TOPs) & SVf_UTF8 - ))); + if (!code) + DIE(aTHX_ "Can't find an opnumber for \"%"UTF8f"\"", + UTF8fARG(SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6)); { SV * const sv = core_prototype(NULL, s + 6, code, NULL); if (sv) ret = sv; @@ -565,8 +573,10 @@ S_refto(pTHX_ SV *sv) SvTEMP_off(sv); SvREFCNT_inc_void_NN(sv); } - else if (SvPADTMP(sv) && !IS_PADGV(sv)) + else if (SvPADTMP(sv)) { + assert(!IS_PADGV(sv)); sv = newSVsv(sv); + } else { SvTEMP_off(sv); SvREFCNT_inc_void_NN(sv); @@ -583,10 +593,8 @@ PP(pp_ref) dVAR; dSP; dTARGET; SV * const sv = POPs; - if (sv) - SvGETMAGIC(sv); - - if (!sv || !SvROK(sv)) + SvGETMAGIC(sv); + if (!SvROK(sv)) RETPUSHNO; (void)sv_ref(TARG,SvRV(sv),TRUE); @@ -600,17 +608,31 @@ PP(pp_bless) HV *stash; if (MAXARG == 1) + { curstash: stash = CopSTASH(PL_curcop); + if (SvTYPE(stash) != SVt_PVHV) + Perl_croak(aTHX_ "Attempt to bless into a freed package"); + } else { SV * const ssv = POPs; STRLEN len; const char *ptr; if (!ssv) goto curstash; - if (!SvGMAGICAL(ssv) && !SvAMAGIC(ssv) && SvROK(ssv)) + SvGETMAGIC(ssv); + if (SvROK(ssv)) { + if (!SvAMAGIC(ssv)) { + frog: Perl_croak(aTHX_ "Attempt to bless into a reference"); - ptr = SvPV_const(ssv,len); + } + /* SvAMAGIC is on here, but it only means potentially overloaded, + so after stringification: */ + ptr = SvPV_nomg_const(ssv,len); + /* We need to check the flag again: */ + if (!SvAMAGIC(ssv)) goto frog; + } + else ptr = SvPV_nomg_const(ssv,len); if (len == 0) Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Explicit blessing to '' (assuming package main)"); @@ -638,7 +660,12 @@ PP(pp_gelem) switch (*elem) { case 'A': if (len == 5 && strEQ(second_letter, "RRAY")) + { tmpRef = MUTABLE_SV(GvAV(gv)); + if (tmpRef && !AvREAL((const AV *)tmpRef) + && AvREIFY((const AV *)tmpRef)) + av_reify(MUTABLE_AV(tmpRef)); + } break; case 'C': if (len == 4 && strEQ(second_letter, "ODE")) @@ -962,7 +989,12 @@ PP(pp_undef) "Constant subroutine %"SVf" undefined", SVfARG(CvANON((const CV *)sv) ? newSVpvs_flags("(anonymous)", SVs_TEMP) - : sv_2mortal(newSVhek(GvENAME_HEK(CvGV((const CV *)sv)))))); + : sv_2mortal(newSVhek( + CvNAMED(sv) + ? CvNAME_HEK((CV *)sv) + : GvENAME_HEK(CvGV((const CV *)sv)) + )) + )); /* FALLTHROUGH */ case SVt_PVFM: { @@ -996,10 +1028,13 @@ PP(pp_undef) else stash = NULL; } + SvREFCNT_inc_simple_void_NN(sv_2mortal(sv)); gp_free(MUTABLE_GV(sv)); Newxz(gp, 1, GP); GvGP_set(sv, gp_ref(gp)); +#ifndef PERL_DONT_CREATE_GVSV GvSV(sv) = newSV(0); +#endif GvLINE(sv) = CopLINE(PL_curcop); GvEGV(sv) = MUTABLE_GV(sv); GvMULTI_on(sv); @@ -1621,33 +1656,36 @@ PP(pp_repeat) else count = uv; } else { - const IV iv = SvIV_nomg(sv); - if (iv < 0) - count = 0; - else - count = iv; + count = SvIV_nomg(sv); } } else if (SvNOKp(sv)) { const NV nv = SvNV_nomg(sv); if (nv < 0.0) - count = 0; + count = -1; /* An arbitrary negative integer */ else count = (IV)nv; } else count = SvIV_nomg(sv); + if (count < 0) { + count = 0; + Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC), + "Negative repeat count does nothing"); + } + if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) { dMARK; - static const char oom_list_extend[] = "Out of memory during list extend"; + static const char* const oom_list_extend = "Out of memory during list extend"; const I32 items = SP - MARK; const I32 max = items * count; + const U8 mod = PL_op->op_flags & OPf_MOD; MEM_WRAP_CHECK_1(max, SV*, oom_list_extend); /* Did the max computation overflow? */ if (items > 0 && max > 0 && (max < items || max < count)) - Perl_croak(aTHX_ oom_list_extend); + Perl_croak(aTHX_ "%s", oom_list_extend); MEXTEND(MARK, max); if (count > 1) { while (SP > MARK) { @@ -1675,8 +1713,13 @@ PP(pp_repeat) SvREADONLY_on(*SP); } #else - if (*SP) + if (*SP) { + if (mod && SvPADTMP(*SP)) { + assert(!IS_PADGV(*SP)); + *SP = sv_mortalcopy(*SP); + } SvTEMP_off((*SP)); + } #endif SP--; } @@ -1692,7 +1735,7 @@ PP(pp_repeat) SV * const tmpstr = POPs; STRLEN len; bool isutf; - static const char oom_string_extend[] = + static const char* const oom_string_extend = "Out of memory during string extend"; if (TARG != tmpstr) @@ -1705,7 +1748,7 @@ PP(pp_repeat) else { const STRLEN max = (UV)count * len; if (len > MEM_SIZE_MAX / count) - Perl_croak(aTHX_ oom_string_extend); + Perl_croak(aTHX_ "%s", oom_string_extend); MEM_WRAP_CHECK_1(max, char, oom_string_extend); SvGROW(TARG, max + 1); repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1); @@ -2094,9 +2137,13 @@ PP(pp_sle) tryAMAGICbin_MG(amg_type, AMGf_set); { dPOPTOPssrl; - const int cmp = (IN_LOCALE_RUNTIME - ? sv_cmp_locale_flags(left, right, 0) - : sv_cmp_flags(left, right, 0)); + const int cmp = +#ifdef USE_LOCALE_COLLATE + (IN_LC_RUNTIME(LC_COLLATE)) + ? sv_cmp_locale_flags(left, right, 0) + : +#endif + sv_cmp_flags(left, right, 0); SETs(boolSV(cmp * multiplier < rhs)); RETURN; } @@ -2130,9 +2177,13 @@ PP(pp_scmp) tryAMAGICbin_MG(scmp_amg, 0); { dPOPTOPssrl; - const int cmp = (IN_LOCALE_RUNTIME - ? sv_cmp_locale_flags(left, right, 0) - : sv_cmp_flags(left, right, 0)); + const int cmp = +#ifdef USE_LOCALE_COLLATE + (IN_LC_RUNTIME(LC_COLLATE)) + ? sv_cmp_locale_flags(left, right, 0) + : +#endif + sv_cmp_flags(left, right, 0); SETi( cmp ); RETURN; } @@ -2237,7 +2288,8 @@ PP(pp_negate) if (SvIsUV(sv)) { if (SvIVX(sv) == IV_MIN) { /* 2s complement assumption. */ - SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == IV_MIN */ + SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) == + IV_MIN */ RETURN; } else if (SvUVX(sv) <= IV_MAX) { @@ -2295,9 +2347,8 @@ PP(pp_complement) I32 anum; STRLEN len; - (void)SvPV_nomg_const(sv,len); /* force check for uninit var */ - sv_setsv_nomg(TARG, sv); - tmps = (U8*)SvPV_force_nomg(TARG, len); + sv_copypv_nomg(TARG, sv); + tmps = (U8*)SvPV_nomg(TARG, len); anum = len; if (SvUTF8(TARG)) { /* Calculate exact length, let's not estimate. */ @@ -2689,10 +2740,6 @@ PP(pp_sin) --Jarkko Hietaniemi 27 September 1998 */ -#ifndef HAS_DRAND48_PROTO -extern double drand48 (void); -#endif - PP(pp_rand) { dVAR; @@ -2934,6 +2981,7 @@ Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv, int pos2_is_uv; PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS; + PERL_UNUSED_CONTEXT; if (!pos1_is_uv && pos1_iv < 0 && curlen) { pos1_is_uv = curlen-1 > ~(UV)pos1_iv; @@ -3162,8 +3210,8 @@ PP(pp_index) SV *temp = NULL; STRLEN biglen; STRLEN llen = 0; - I32 offset; - I32 retval; + SSize_t offset = 0; + SSize_t retval; const char *big_p; const char *little_p; bool big_utf8; @@ -3246,13 +3294,13 @@ PP(pp_index) offset = is_index ? 0 : biglen; else { if (big_utf8 && offset > 0) - sv_pos_u2b(big, &offset, 0); + offset = sv_pos_u2b_flags(big, offset, 0, SV_CONST_RETURN); if (!is_index) offset += llen; } if (offset < 0) offset = 0; - else if (offset > (I32)biglen) + else if (offset > (SSize_t)biglen) offset = biglen; if (!(little_p = is_index ? fbm_instr((unsigned char*)big_p + offset, @@ -3263,7 +3311,7 @@ PP(pp_index) else { retval = little_p - big_p; if (retval > 0 && big_utf8) - sv_pos_b2u(big, &retval); + retval = sv_pos_b2u_flags(big, retval, SV_CONST_RETURN); } SvREFCNT_dec(temp); fail: @@ -3293,12 +3341,13 @@ PP(pp_ord) if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) { SV * const tmpsv = sv_2mortal(newSVsv(argsv)); s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding); + len = UTF8SKIP(s); /* Should be well-formed; so this is its length */ argsv = tmpsv; } - XPUSHu(DO_UTF8(argsv) ? - utf8n_to_uvchr(s, UTF8_MAXBYTES, 0, UTF8_ALLOW_ANYUV) : - (UV)(*s & 0xff)); + XPUSHu(DO_UTF8(argsv) + ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV) + : (UV)(*s)); RETURN; } @@ -3323,7 +3372,7 @@ PP(pp_chr) top = top2; } Perl_warner(aTHX_ packWARN(WARN_UTF8), - "Invalid negative number (%"SVf") in chr", top); + "Invalid negative number (%"SVf") in chr", SVfARG(top)); } value = UNICODE_REPLACEMENT; } else { @@ -3422,15 +3471,6 @@ PP(pp_crypt) /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */ -/* Generates code to store a unicode codepoint c that is known to occupy - * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1, - * and p is advanced to point to the next available byte after the two bytes */ -#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c) \ - STMT_START { \ - *(p)++ = UTF8_TWO_BYTE_HI(c); \ - *((p)++) = UTF8_TWO_BYTE_LO(c); \ - } STMT_END - PP(pp_ucfirst) { /* Actually is both lcfirst() and ucfirst(). Only the first character @@ -3456,22 +3496,16 @@ PP(pp_ucfirst) STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or * lowercased) character stored in tmpbuf. May be either * UTF-8 or not, but in either case is the number of bytes */ - bool tainted = FALSE; - SvGETMAGIC(source); - if (SvOK(source)) { - s = (const U8*)SvPV_nomg_const(source, slen); - } else { - if (ckWARN(WARN_UNINITIALIZED)) - report_uninit(source); - s = (const U8*)""; - slen = 0; - } + s = (const U8*)SvPV_const(source, slen); /* We may be able to get away with changing only the first character, in * place, but not if read-only, etc. Later we may discover more reasons to * not convert in-place. */ - inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source); + inplace = !SvREADONLY(source) + && ( SvPADTMP(source) + || ( SvTEMP(source) && !SvSMAGICAL(source) + && SvREFCNT(source) == 1)); /* First calculate what the changed first character should be. This affects * whether we can just swap it out, leaving the rest of the string unchanged, @@ -3485,12 +3519,18 @@ PP(pp_ucfirst) doing_utf8 = TRUE; ulen = UTF8SKIP(s); if (op_type == OP_UCFIRST) { - _to_utf8_title_flags(s, tmpbuf, &tculen, - cBOOL(IN_LOCALE_RUNTIME), &tainted); +#ifdef USE_LOCALE_CTYPE + _to_utf8_title_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); +#else + _to_utf8_title_flags(s, tmpbuf, &tculen, 0); +#endif } else { - _to_utf8_lower_flags(s, tmpbuf, &tculen, - cBOOL(IN_LOCALE_RUNTIME), &tainted); +#ifdef USE_LOCALE_CTYPE + _to_utf8_lower_flags(s, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE)); +#else + _to_utf8_lower_flags(s, tmpbuf, &tculen, 0); +#endif } /* we can't do in-place if the length changes. */ @@ -3508,22 +3548,42 @@ PP(pp_ucfirst) if (op_type == OP_LCFIRST) { /* lower case the first letter: no trickiness for any character */ - *tmpbuf = (IN_LOCALE_RUNTIME) ? toLOWER_LC(*s) : - ((IN_UNI_8_BIT) ? toLOWER_LATIN1(*s) : toLOWER(*s)); + *tmpbuf = +#ifdef USE_LOCALE_CTYPE + (IN_LC_RUNTIME(LC_CTYPE)) + ? toLOWER_LC(*s) + : +#endif + (IN_UNI_8_BIT) + ? toLOWER_LATIN1(*s) + : toLOWER(*s); } /* is ucfirst() */ - else if (IN_LOCALE_RUNTIME) { - *tmpbuf = toUPPER_LC(*s); /* This would be a bug if any locales - * have upper and title case different - */ +#ifdef USE_LOCALE_CTYPE + else if (IN_LC_RUNTIME(LC_CTYPE)) { + if (IN_UTF8_CTYPE_LOCALE) { + goto do_uni_rules; + } + + *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any + locales have upper and title case + different */ } +#endif else if (! IN_UNI_8_BIT) { *tmpbuf = toUPPER(*s); /* Returns caseless for non-ascii, or * on EBCDIC machines whatever the * native function does */ } - else { /* is ucfirst non-UTF-8, not in locale, and cased latin1 */ - UV title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's'); + else { + /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is + * UTF-8, which we treat as not in locale), and cased latin1 */ + UV title_ord; +#ifdef USE_LOCALE_CTYPE + do_uni_rules: +#endif + + title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's'); if (tculen > 1) { assert(tculen == 2); @@ -3540,10 +3600,10 @@ PP(pp_ucfirst) * replace just the first character in place. */ inplace = FALSE; - /* If the result won't fit in a byte, the entire result will - * have to be in UTF-8. Assume worst case sizing in - * conversion. (all latin1 characters occupy at most two bytes - * in utf8) */ + /* If the result won't fit in a byte, the entire result + * will have to be in UTF-8. Assume worst case sizing in + * conversion. (all latin1 characters occupy at most two + * bytes in utf8) */ if (title_ord > 255) { doing_utf8 = TRUE; convert_source_to_utf8 = TRUE; @@ -3624,17 +3684,9 @@ PP(pp_ucfirst) SvCUR_set(dest, need - 1); } - if (tainted) { - TAINT; - SvTAINTED_on(dest); - } } else { /* Neither source nor dest are in or need to be UTF-8 */ if (slen) { - if (IN_LOCALE_RUNTIME) { - TAINT; - SvTAINTED_on(dest); - } if (inplace) { /* in-place, only need to change the 1st char */ *d = *tmpbuf; } @@ -3653,7 +3705,7 @@ PP(pp_ucfirst) /* In a "use bytes" we don't treat the source as UTF-8, but, still want * the destination to retain that flag */ - if (SvUTF8(source)) + if (SvUTF8(source) && ! IN_BYTES) SvUTF8_on(dest); if (!inplace) { /* Finish the rest of the string, unchanged */ @@ -3662,6 +3714,12 @@ PP(pp_ucfirst) SvCUR_set(dest, need - 1); } } +#ifdef USE_LOCALE_CTYPE + if (IN_LC_RUNTIME(LC_CTYPE)) { + TAINT; + SvTAINTED_on(dest); + } +#endif if (dest != source && SvTAINTED(source)) SvTAINT(dest); SvSETMAGIC(dest); @@ -3684,17 +3742,29 @@ PP(pp_uc) SvGETMAGIC(source); - if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source) - && SvTEMP(source) && !DO_UTF8(source) - && (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) { - - /* We can convert in place. The reason we can't if in UNI_8_BIT is to - * make the loop tight, so we overwrite the source with the dest before - * looking at it, and we need to look at the original source - * afterwards. There would also need to be code added to handle - * switching to not in-place in midstream if we run into characters - * that change the length. - */ + if ((SvPADTMP(source) + || + (SvTEMP(source) && !SvSMAGICAL(source) && SvREFCNT(source) == 1)) + && !SvREADONLY(source) && SvPOK(source) + && !DO_UTF8(source) + && ( +#ifdef USE_LOCALE_CTYPE + (IN_LC_RUNTIME(LC_CTYPE)) + ? ! IN_UTF8_CTYPE_LOCALE + : +#endif + ! IN_UNI_8_BIT)) + { + + /* We can convert in place. The reason we can't if in UNI_8_BIT is to + * make the loop tight, so we overwrite the source with the dest before + * looking at it, and we need to look at the original source + * afterwards. There would also need to be code added to handle + * switching to not in-place in midstream if we run into characters + * that change the length. Since being in locale overrides UNI_8_BIT, + * that latter becomes irrelevant in the above test; instead for + * locale, the size can't normally change, except if the locale is a + * UTF-8 one */ dest = source; s = d = (U8*)SvPV_force_nomg(source, len); min = len + 1; @@ -3703,21 +3773,7 @@ PP(pp_uc) dest = TARG; - /* The old implementation would copy source into TARG at this point. - This had the side effect that if source was undef, TARG was now - an undefined SV with PADTMP set, and they don't warn inside - sv_2pv_flags(). However, we're now getting the PV direct from - source, which doesn't have PADTMP set, so it would warn. Hence the - little games. */ - - if (SvOK(source)) { - s = (const U8*)SvPV_nomg_const(source, len); - } else { - if (ckWARN(WARN_UNINITIALIZED)) - report_uninit(source); - s = (const U8*)""; - len = 0; - } + s = (const U8*)SvPV_nomg_const(source, len); min = len + 1; SvUPGRADE(dest, SVt_PV); @@ -3732,8 +3788,7 @@ PP(pp_uc) if (DO_UTF8(source)) { const U8 *const send = s + len; - U8 tmpbuf[UTF8_MAXBYTES+1]; - bool tainted = FALSE; + U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; /* All occurrences of these are to be moved to follow any other marks. * This is context-dependent. We may not be passed enough context to @@ -3751,13 +3806,11 @@ PP(pp_uc) STRLEN u; STRLEN ulen; UV uv; - if (in_iota_subscript && ! is_utf8_mark(s)) { + if (in_iota_subscript && ! _is_utf8_mark(s)) { /* A non-mark. Time to output the iota subscript */ -#define GREEK_CAPITAL_LETTER_IOTA 0x0399 -#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345 - - CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); + Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8); + d += capital_iota_len; in_iota_subscript = FALSE; } @@ -3765,8 +3818,13 @@ PP(pp_uc) * and copy it to the output buffer */ u = UTF8SKIP(s); - uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, - cBOOL(IN_LOCALE_RUNTIME), &tainted); +#ifdef USE_LOCALE_CTYPE + uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); +#else + uv = _to_utf8_upper_flags(s, tmpbuf, &ulen, 0); +#endif +#define GREEK_CAPITAL_LETTER_IOTA 0x0399 +#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345 if (uv == GREEK_CAPITAL_LETTER_IOTA && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI) { @@ -3792,16 +3850,13 @@ PP(pp_uc) s += u; } if (in_iota_subscript) { - CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA); + Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8); + d += capital_iota_len; } SvUTF8_on(dest); *d = '\0'; SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); - if (tainted) { - TAINT; - SvTAINTED_on(dest); - } } else { /* Not UTF-8 */ if (len) { @@ -3810,21 +3865,30 @@ PP(pp_uc) /* Use locale casing if in locale; regular style if not treating * latin1 as having case; otherwise the latin1 casing. Do the * whole thing in a tight loop, for speed, */ - if (IN_LOCALE_RUNTIME) { - TAINT; - SvTAINTED_on(dest); +#ifdef USE_LOCALE_CTYPE + if (IN_LC_RUNTIME(LC_CTYPE)) { + if (IN_UTF8_CTYPE_LOCALE) { + goto do_uni_rules; + } for (; s < send; d++, s++) - *d = toUPPER_LC(*s); + *d = (U8) toUPPER_LC(*s); } - else if (! IN_UNI_8_BIT) { + else +#endif + if (! IN_UNI_8_BIT) { for (; s < send; d++, s++) { *d = toUPPER(*s); } } else { +#ifdef USE_LOCALE_CTYPE + do_uni_rules: +#endif for (; s < send; d++, s++) { *d = toUPPER_LATIN1_MOD(*s); - if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) continue; + if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) { + continue; + } /* The mainstream case is the tight loop above. To avoid * extra tests in that, all three characters that require @@ -3908,6 +3972,12 @@ PP(pp_uc) SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); } } /* End of isn't utf8 */ +#ifdef USE_LOCALE_CTYPE + if (IN_LC_RUNTIME(LC_CTYPE)) { + TAINT; + SvTAINTED_on(dest); + } +#endif if (dest != source && SvTAINTED(source)) SvTAINT(dest); SvSETMAGIC(dest); @@ -3927,8 +3997,12 @@ PP(pp_lc) SvGETMAGIC(source); - if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source) - && SvTEMP(source) && !DO_UTF8(source)) { + if ( ( SvPADTMP(source) + || ( SvTEMP(source) && !SvSMAGICAL(source) + && SvREFCNT(source) == 1 ) + ) + && !SvREADONLY(source) && SvPOK(source) + && !DO_UTF8(source)) { /* We can convert in place, as lowercasing anything in the latin1 range * (or else DO_UTF8 would have been on) doesn't lengthen it */ @@ -3940,21 +4014,7 @@ PP(pp_lc) dest = TARG; - /* The old implementation would copy source into TARG at this point. - This had the side effect that if source was undef, TARG was now - an undefined SV with PADTMP set, and they don't warn inside - sv_2pv_flags(). However, we're now getting the PV direct from - source, which doesn't have PADTMP set, so it would warn. Hence the - little games. */ - - if (SvOK(source)) { - s = (const U8*)SvPV_nomg_const(source, len); - } else { - if (ckWARN(WARN_UNINITIALIZED)) - report_uninit(source); - s = (const U8*)""; - len = 0; - } + s = (const U8*)SvPV_nomg_const(source, len); min = len + 1; SvUPGRADE(dest, SVt_PV); @@ -3970,17 +4030,19 @@ PP(pp_lc) if (DO_UTF8(source)) { const U8 *const send = s + len; U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; - bool tainted = FALSE; while (s < send) { const STRLEN u = UTF8SKIP(s); STRLEN ulen; - _to_utf8_lower_flags(s, tmpbuf, &ulen, - cBOOL(IN_LOCALE_RUNTIME), &tainted); +#ifdef USE_LOCALE_CTYPE + _to_utf8_lower_flags(s, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE)); +#else + _to_utf8_lower_flags(s, tmpbuf, &ulen, 0); +#endif /* Here is where we would do context-sensitive actions. See the - * commit message for this comment for why there isn't any */ + * commit message for 86510fb15 for why there isn't any */ if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { @@ -4007,10 +4069,6 @@ PP(pp_lc) SvUTF8_on(dest); *d = '\0'; SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); - if (tainted) { - TAINT; - SvTAINTED_on(dest); - } } else { /* Not utf8 */ if (len) { const U8 *const send = s + len; @@ -4018,13 +4076,14 @@ PP(pp_lc) /* Use locale casing if in locale; regular style if not treating * latin1 as having case; otherwise the latin1 casing. Do the * whole thing in a tight loop, for speed, */ - if (IN_LOCALE_RUNTIME) { - TAINT; - SvTAINTED_on(dest); +#ifdef USE_LOCALE_CTYPE + if (IN_LC_RUNTIME(LC_CTYPE)) { for (; s < send; d++, s++) *d = toLOWER_LC(*s); - } - else if (! IN_UNI_8_BIT) { + } + else +#endif + if (! IN_UNI_8_BIT) { for (; s < send; d++, s++) { *d = toLOWER(*s); } @@ -4040,6 +4099,12 @@ PP(pp_lc) SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); } } +#ifdef USE_LOCALE_CTYPE + if (IN_LC_RUNTIME(LC_CTYPE)) { + TAINT; + SvTAINTED_on(dest); + } +#endif if (dest != source && SvTAINTED(source)) SvTAINT(dest); SvSETMAGIC(dest); @@ -4070,14 +4135,15 @@ PP(pp_quotemeta) } } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) { - +#ifdef USE_LOCALE_CTYPE /* In locale, we quote all non-ASCII Latin1 chars. * Otherwise use the quoting rules */ - if (IN_LOCALE_RUNTIME - || _isQUOTEMETA(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1)))) + if (IN_LC_RUNTIME(LC_CTYPE) + || _isQUOTEMETA(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s + 1)))) { to_quote = TRUE; } +#endif } else if (is_QUOTEMETA_high(s)) { to_quote = TRUE; @@ -4132,10 +4198,14 @@ PP(pp_fc) const U8 *s; const U8 *send; U8 *d; - U8 tmpbuf[UTF8_MAXBYTES * UTF8_MAX_FOLD_CHAR_EXPAND + 1]; - const bool full_folding = TRUE; + U8 tmpbuf[UTF8_MAXBYTES_CASE + 1]; + const bool full_folding = TRUE; /* This variable is here so we can easily + move to more generality later */ const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 ) - | ( IN_LOCALE_RUNTIME ? FOLD_FLAGS_LOCALE : 0 ); +#ifdef USE_LOCALE_CTYPE + | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 ) +#endif + ; /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me. * You are welcome(?) -Hugmeir @@ -4164,12 +4234,11 @@ PP(pp_fc) send = s + len; if (DO_UTF8(source)) { /* UTF-8 flagged string. */ - bool tainted = FALSE; while (s < send) { const STRLEN u = UTF8SKIP(s); STRLEN ulen; - _to_utf8_fold_flags(s, tmpbuf, &ulen, flags, &tainted); + _to_utf8_fold_flags(s, tmpbuf, &ulen, flags); if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { const UV o = d - (U8*)SvPVX_const(dest); @@ -4182,41 +4251,39 @@ PP(pp_fc) s += u; } SvUTF8_on(dest); - if (tainted) { - TAINT; - SvTAINTED_on(dest); - } } /* Unflagged string */ else if (len) { - /* For locale, bytes, and nothing, the behavior is supposed to be the - * same as lc(). - */ - if ( IN_LOCALE_RUNTIME ) { /* Under locale */ - TAINT; - SvTAINTED_on(dest); +#ifdef USE_LOCALE_CTYPE + if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */ + if (IN_UTF8_CTYPE_LOCALE) { + goto do_uni_folding; + } for (; s < send; d++, s++) - *d = toLOWER_LC(*s); + *d = (U8) toFOLD_LC(*s); } - else if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */ + else +#endif + if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */ for (; s < send; d++, s++) - *d = toLOWER(*s); + *d = toFOLD(*s); } else { - /* For ASCII and the Latin-1 range, there's only two troublesome folds, - * \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full casefolding - * becomes 'ss', and \x{B5} (\N{MICRO SIGN}), which under any fold becomes - * \x{3BC} (\N{GREEK SMALL LETTER MU}) -- For the rest, the casefold is - * their lowercase. - */ +#ifdef USE_LOCALE_CTYPE + do_uni_folding: +#endif + /* For ASCII and the Latin-1 range, there's only two troublesome + * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full + * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which + * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) -- + * For the rest, the casefold is their lowercase. */ for (; s < send; d++, s++) { if (*s == MICRO_SIGN) { - /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU}, which - * is outside of the latin-1 range. There's a couple of ways to - * deal with this -- khw discusses them in pp_lc/uc, so go there :) - * What we do here is upgrade what we had already casefolded, - * then enter an inner loop that appends the rest of the characters - * as UTF-8. - */ + /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU}, + * which is outside of the latin-1 range. There's a couple + * of ways to deal with this -- khw discusses them in + * pp_lc/uc, so go there :) What we do here is upgrade what + * we had already casefolded, then enter an inner loop that + * appends the rest of the characters as UTF-8. */ len = d - (U8*)SvPVX_const(dest); SvCUR_set(dest, len); len = sv_utf8_upgrade_flags_grow(dest, @@ -4226,13 +4293,16 @@ PP(pp_fc) (send -s) * 2 + 1); d = (U8*)SvPVX(dest) + len; - CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_SMALL_LETTER_MU); + Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8); + d += small_mu_len; s++; for (; s < send; s++) { STRLEN ulen; UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags); - if UNI_IS_INVARIANT(fc) { - if ( full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) { + if UVCHR_IS_INVARIANT(fc) { + if (full_folding + && *s == LATIN_SMALL_LETTER_SHARP_S) + { *d++ = 's'; *d++ = 's'; } @@ -4247,9 +4317,8 @@ PP(pp_fc) break; } else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) { - /* Under full casefolding, LATIN SMALL LETTER SHARP S becomes "ss", - * which may require growing the SV. - */ + /* Under full casefolding, LATIN SMALL LETTER SHARP S + * becomes "ss", which may require growing the SV. */ if (SvLEN(dest) < ++min) { const UV o = d - (U8*)SvPVX_const(dest); SvGROW(dest, min); @@ -4258,7 +4327,8 @@ PP(pp_fc) *(d)++ = 's'; *d = 's'; } - else { /* If it's not one of those two, the fold is their lower case */ + else { /* If it's not one of those two, the fold is their lower + case */ *d = toLOWER_LATIN1(*s); } } @@ -4267,6 +4337,12 @@ PP(pp_fc) *d = '\0'; SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); +#ifdef USE_LOCALE_CTYPE + if (IN_LC_RUNTIME(LC_CTYPE)) { + TAINT; + SvTAINTED_on(dest); + } +#endif if (SvTAINTED(source)) SvTAINT(dest); SvSETMAGIC(dest); @@ -4294,9 +4370,9 @@ PP(pp_aslice) if (lval && localizing) { SV **svp; - I32 max = -1; + SSize_t max = -1; for (svp = MARK + 1; svp <= SP; svp++) { - const I32 elem = SvIV(*svp); + const SSize_t elem = SvIV(*svp); if (elem > max) max = elem; } @@ -4306,7 +4382,7 @@ PP(pp_aslice) while (++MARK <= SP) { SV **svp; - I32 elem = SvIV(*MARK); + SSize_t elem = SvIV(*MARK); bool preeminent = TRUE; if (localizing && can_preserve) { @@ -4319,7 +4395,7 @@ PP(pp_aslice) svp = av_fetch(av, elem, lval); if (lval) { - if (!svp || *svp == &PL_sv_undef) + if (!svp || !*svp) DIE(aTHX_ PL_no_aelem, elem); if (localizing) { if (preeminent) @@ -4339,6 +4415,51 @@ PP(pp_aslice) RETURN; } +PP(pp_kvaslice) +{ + dVAR; dSP; dMARK; + AV *const av = MUTABLE_AV(POPs); + I32 lval = (PL_op->op_flags & OPf_MOD); + SSize_t items = SP - MARK; + + if (PL_op->op_private & OPpMAYBE_LVSUB) { + const I32 flags = is_lvalue_sub(); + if (flags) { + if (!(flags & OPpENTERSUB_INARGS)) + /* diag_listed_as: Can't modify %s in %s */ + Perl_croak(aTHX_ "Can't modify index/value array slice in list assignment"); + lval = flags; + } + } + + MEXTEND(SP,items); + while (items > 1) { + *(MARK+items*2-1) = *(MARK+items); + items--; + } + items = SP-MARK; + SP += items; + + while (++MARK <= SP) { + SV **svp; + + svp = av_fetch(av, SvIV(*MARK), lval); + if (lval) { + if (!svp || !*svp || *svp == &PL_sv_undef) { + DIE(aTHX_ PL_no_aelem, SvIV(*MARK)); + } + *MARK = sv_mortalcopy(*MARK); + } + *++MARK = svp ? *svp : &PL_sv_undef; + } + if (GIMME != G_ARRAY) { + MARK = SP - items*2; + *++MARK = items > 0 ? *SP : &PL_sv_undef; + SP = MARK; + } + RETURN; +} + /* Smart dereferencing for keys, values and each */ PP(pp_rkeys) { @@ -4372,7 +4493,9 @@ PP(pp_rkeys) return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX); } else { - return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX); + return (SvTYPE(sv) == SVt_PVHV) + ? Perl_pp_each(aTHX) + : Perl_pp_aeach(aTHX); } } @@ -4385,7 +4508,7 @@ PP(pp_aeach) IV *iterp = Perl_av_iter_p(aTHX_ array); const IV current = (*iterp)++; - if (current > av_len(array)) { + if (current > av_tindex(array)) { *iterp = 0; if (gimme == G_SCALAR) RETPUSHUNDEF; @@ -4413,7 +4536,7 @@ PP(pp_akeys) if (gimme == G_SCALAR) { dTARGET; - PUSHi(av_len(array) + 1); + PUSHi(av_tindex(array) + 1); } else if (gimme == G_ARRAY) { IV n = Perl_av_len(aTHX_ array); @@ -4479,15 +4602,15 @@ S_do_delete_local(pTHX) const MAGIC *mg; HV *stash; const bool sliced = !!(PL_op->op_private & OPpSLICE); - SV *unsliced_keysv = sliced ? NULL : POPs; + SV **unsliced_keysv = sliced ? NULL : sp--; SV * const osv = POPs; - SV **mark = sliced ? PL_stack_base + POPMARK : &unsliced_keysv-1; + SV **mark = sliced ? PL_stack_base + POPMARK : unsliced_keysv-1; dORIGMARK; const bool tied = SvRMAGICAL(osv) && mg_find((const SV *)osv, PERL_MAGIC_tied); const bool can_preserve = SvCANEXISTDELETE(osv); const U32 type = SvTYPE(osv); - SV ** const end = sliced ? SP : &unsliced_keysv; + SV ** const end = sliced ? SP : unsliced_keysv; if (type == SVt_PVHV) { /* hash element */ HV * const hv = MUTABLE_HV(osv); @@ -4506,7 +4629,8 @@ S_do_delete_local(pTHX) } else { sv = hv_delete_ent(hv, keysv, 0, 0); - SvREFCNT_inc_simple_void(sv); /* De-mortalize */ + if (preeminent) + SvREFCNT_inc_simple_void(sv); /* De-mortalize */ } if (preeminent) { if (!sv) DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); @@ -4527,7 +4651,7 @@ S_do_delete_local(pTHX) if (PL_op->op_flags & OPf_SPECIAL) { AV * const av = MUTABLE_AV(osv); while (++MARK <= end) { - I32 idx = SvIV(*MARK); + SSize_t idx = SvIV(*MARK); SV *sv = NULL; bool preeminent = TRUE; if (can_preserve) @@ -4541,7 +4665,8 @@ S_do_delete_local(pTHX) } else { sv = av_delete(av, idx, 0); - SvREFCNT_inc_simple_void(sv); /* De-mortalize */ + if (preeminent) + SvREFCNT_inc_simple_void(sv); /* De-mortalize */ } if (preeminent) { save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM); @@ -4575,7 +4700,7 @@ S_do_delete_local(pTHX) } } else if (gimme != G_VOID) - PUSHs(unsliced_keysv); + PUSHs(*unsliced_keysv); RETURN; } @@ -4653,7 +4778,7 @@ PP(pp_exists) SV *tmpsv; HV *hv; - if (PL_op->op_private & OPpEXISTS_SUB) { + if (UNLIKELY( PL_op->op_private & OPpEXISTS_SUB )) { GV *gv; SV * const sv = POPs; CV * const cv = sv_2cv(sv, &hv, &gv, 0); @@ -4665,7 +4790,7 @@ PP(pp_exists) } tmpsv = POPs; hv = MUTABLE_HV(POPs); - if (SvTYPE(hv) == SVt_PVHV) { + if (LIKELY( SvTYPE(hv) == SVt_PVHV )) { if (hv_exists_ent(hv, tmpsv, 0)) RETPUSHYES; } @@ -4738,19 +4863,72 @@ PP(pp_hslice) RETURN; } +PP(pp_kvhslice) +{ + dVAR; dSP; dMARK; + HV * const hv = MUTABLE_HV(POPs); + I32 lval = (PL_op->op_flags & OPf_MOD); + SSize_t items = SP - MARK; + + if (PL_op->op_private & OPpMAYBE_LVSUB) { + const I32 flags = is_lvalue_sub(); + if (flags) { + if (!(flags & OPpENTERSUB_INARGS)) + /* diag_listed_as: Can't modify %s in %s */ + Perl_croak(aTHX_ "Can't modify key/value hash slice in list assignment"); + lval = flags; + } + } + + MEXTEND(SP,items); + while (items > 1) { + *(MARK+items*2-1) = *(MARK+items); + items--; + } + items = SP-MARK; + SP += items; + + while (++MARK <= SP) { + SV * const keysv = *MARK; + SV **svp; + HE *he; + + he = hv_fetch_ent(hv, keysv, lval, 0); + svp = he ? &HeVAL(he) : NULL; + + if (lval) { + if (!svp || !*svp || *svp == &PL_sv_undef) { + DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv)); + } + *MARK = sv_mortalcopy(*MARK); + } + *++MARK = svp && *svp ? *svp : &PL_sv_undef; + } + if (GIMME != G_ARRAY) { + MARK = SP - items*2; + *++MARK = items > 0 ? *SP : &PL_sv_undef; + SP = MARK; + } + RETURN; +} + /* List operators. */ PP(pp_list) { - dVAR; dSP; dMARK; + dVAR; + I32 markidx = POPMARK; if (GIMME != G_ARRAY) { + SV **mark = PL_stack_base + markidx; + dSP; if (++MARK <= SP) *MARK = *SP; /* unwanted list, return last item */ else *MARK = &PL_sv_undef; SP = MARK; + PUTBACK; } - RETURN; + return NORMAL; } PP(pp_lslice) @@ -4762,6 +4940,7 @@ PP(pp_lslice) SV ** const firstlelem = PL_stack_base + POPMARK + 1; SV ** const firstrelem = lastlelem + 1; I32 is_something_there = FALSE; + const U8 mod = PL_op->op_flags & OPf_MOD; const I32 max = lastrelem - lastlelem; SV **lelem; @@ -4793,6 +4972,10 @@ PP(pp_lslice) is_something_there = TRUE; if (!(*lelem = firstrelem[ix])) *lelem = &PL_sv_undef; + else if (mod && SvPADTMP(*lelem)) { + assert(!IS_PADGV(*lelem)); + *lelem = firstrelem[ix] = sv_mortalcopy(*lelem); + } } } if (is_something_there) @@ -4804,10 +4987,10 @@ PP(pp_lslice) PP(pp_anonlist) { - dVAR; dSP; dMARK; dORIGMARK; + dVAR; dSP; dMARK; const I32 items = SP - MARK; SV * const av = MUTABLE_SV(av_make(items, MARK+1)); - SP = ORIGMARK; /* av_make() might realloc stack_sp */ + SP = MARK; mXPUSHs((PL_op->op_flags & OPf_SPECIAL) ? newRV_noinc(av) : av); RETURN; @@ -4816,7 +4999,10 @@ PP(pp_anonlist) PP(pp_anonhash) { dVAR; dSP; dMARK; dORIGMARK; - HV* const hv = (HV *)sv_2mortal((SV *)newHV()); + HV* const hv = newHV(); + SV* const retval = sv_2mortal( PL_op->op_flags & OPf_SPECIAL + ? newRV_noinc(MUTABLE_SV(hv)) + : MUTABLE_SV(hv) ); while (MARK < SP) { SV * const key = @@ -4837,9 +5023,7 @@ PP(pp_anonhash) (void)hv_store_ent(hv,key,val,0); } SP = ORIGMARK; - if (PL_op->op_flags & OPf_SPECIAL) - mXPUSHs(newRV_inc(MUTABLE_SV(hv))); - else XPUSHs(MUTABLE_SV(hv)); + XPUSHs(retval); RETURN; } @@ -4880,16 +5064,16 @@ PP(pp_splice) AV *ary = DEREF_PLAIN_ARRAY(MUTABLE_AV(*++MARK)); SV **src; SV **dst; - I32 i; - I32 offset; - I32 length; - I32 newlen; - I32 after; - I32 diff; + SSize_t i; + SSize_t offset; + SSize_t length; + SSize_t newlen; + SSize_t after; + SSize_t diff; const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied); if (mg) { - return Perl_tied_method(aTHX_ "SPLICE", mark - 1, MUTABLE_SV(ary), mg, + return Perl_tied_method(aTHX_ SV_CONST(SPLICE), mark - 1, MUTABLE_SV(ary), mg, GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK, sp - mark); } @@ -4952,14 +5136,18 @@ PP(pp_splice) MARK = ORIGMARK + 1; if (GIMME == G_ARRAY) { /* copy return vals to stack */ + const bool real = cBOOL(AvREAL(ary)); MEXTEND(MARK, length); - Copy(AvARRAY(ary)+offset, MARK, length, SV*); - if (AvREAL(ary)) { + if (real) EXTEND_MORTAL(length); - for (i = length, dst = MARK; i; i--) { + for (i = 0, dst = MARK; i < length; i++) { + if ((*dst = AvARRAY(ary)[i+offset])) { + if (real) sv_2mortal(*dst); /* free them eventually */ - dst++; } + else + *dst = &PL_sv_undef; + dst++; } MARK += length - 1; } @@ -4997,7 +5185,7 @@ PP(pp_splice) } i = -diff; while (i) - dst[--i] = &PL_sv_undef; + dst[--i] = NULL; if (newlen) { Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* ); @@ -5045,13 +5233,16 @@ PP(pp_splice) MARK = ORIGMARK + 1; if (GIMME == G_ARRAY) { /* copy return vals to stack */ if (length) { - Copy(tmparyval, MARK, length, SV*); - if (AvREAL(ary)) { + const bool real = cBOOL(AvREAL(ary)); + if (real) EXTEND_MORTAL(length); - for (i = length, dst = MARK; i; i--) { + for (i = 0, dst = MARK; i < length; i++) { + if ((*dst = tmparyval[i])) { + if (real) sv_2mortal(*dst); /* free them eventually */ - dst++; } + else *dst = &PL_sv_undef; + dst++; } } MARK += length - 1; @@ -5087,7 +5278,7 @@ PP(pp_push) PUSHMARK(MARK); PUTBACK; ENTER_with_name("call_PUSH"); - call_method("PUSH",G_SCALAR|G_DISCARD); + call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED); LEAVE_with_name("call_PUSH"); SPAGAIN; } @@ -5140,12 +5331,12 @@ PP(pp_unshift) PUSHMARK(MARK); PUTBACK; ENTER_with_name("call_UNSHIFT"); - call_method("UNSHIFT",G_SCALAR|G_DISCARD); + call_sv(SV_CONST(UNSHIFT),G_SCALAR|G_DISCARD|G_METHOD_NAMED); LEAVE_with_name("call_UNSHIFT"); SPAGAIN; } else { - I32 i = 0; + SSize_t i = 0; av_unshift(ary, SP - MARK); while (MARK < SP) { SV * const sv = newSVsv(*++MARK); @@ -5176,14 +5367,14 @@ PP(pp_reverse) SP = MARK; if (SvMAGICAL(av)) { - I32 i, j; + SSize_t i, j; SV *tmp = sv_newmortal(); /* For SvCANEXISTDELETE */ HV *stash; const MAGIC *mg; bool can_preserve = SvCANEXISTDELETE(av); - for (i = 0, j = av_len(av); i < j; ++i, --j) { + for (i = 0, j = av_tindex(av); i < j; ++i, --j) { SV *begin, *end; if (can_preserve) { @@ -5248,8 +5439,6 @@ PP(pp_reverse) do_join(TARG, &PL_sv_no, MARK, SP); else { sv_setsv(TARG, SP > MARK ? *SP : find_rundefsv()); - if (! SvOK(TARG) && ckWARN(WARN_UNINITIALIZED)) - report_uninit(TARG); } up = SvPV_force(TARG, len); @@ -5301,15 +5490,16 @@ PP(pp_split) STRLEN len; const char *s = SvPV_const(sv, len); const bool do_utf8 = DO_UTF8(sv); - const bool skipwhite = PL_op->op_flags & OPf_SPECIAL; const char *strend = s + len; PMOP *pm; REGEXP *rx; SV *dstr; const char *m; - I32 iters = 0; - const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s); - I32 maxiters = slen + 10; + SSize_t iters = 0; + const STRLEN slen = do_utf8 + ? utf8_length((U8*)s, (U8*)strend) + : (STRLEN)(strend - s); + SSize_t maxiters = slen + 10; I32 trailing_empty = 0; const char *orig; const I32 origlimit = limit; @@ -5332,9 +5522,7 @@ PP(pp_split) rx = PM_GETRE(pm); TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && - (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite)); - - RX_MATCH_UTF8_set(rx, do_utf8); + (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); #ifdef USE_ITHREADS if (pm->op_pmreplrootu.op_pmtargetoff) { @@ -5347,7 +5535,7 @@ PP(pp_split) #endif else ary = NULL; - if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) { + if (ary) { realarray = 1; PUTBACK; av_extend(ary,0); @@ -5363,7 +5551,7 @@ PP(pp_split) AvREAL_on(ary); AvREIFY_off(ary); for (i = AvFILLp(ary); i >= 0; i--) - AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */ + AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */ } /* temporarily switch stacks */ SAVESWITCHSTACK(PL_curstack, ary); @@ -5372,9 +5560,9 @@ PP(pp_split) } base = SP - PL_stack_base; orig = s; - if (skipwhite) { + if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) { if (do_utf8) { - while (*s == ' ' || is_utf8_space((U8*)s)) + while (isSPACE_utf8(s)) s += UTF8SKIP(s); } else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { @@ -5394,21 +5582,22 @@ PP(pp_split) if (!limit) limit = maxiters + 2; - if (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite) { + if (RX_EXTFLAGS(rx) & RXf_WHITE) { while (--limit) { m = s; /* this one uses 'm' and is a negative test */ if (do_utf8) { - while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) { + while (m < strend && ! isSPACE_utf8(m) ) { const int t = UTF8SKIP(m); - /* is_utf8_space returns FALSE for malform utf8 */ + /* isSPACE_utf8 returns FALSE for malform utf8 */ if (strend - m < t) m = strend; else m += t; } } - else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) + { while (m < strend && !isSPACE_LC(*m)) ++m; } else { @@ -5438,10 +5627,11 @@ PP(pp_split) /* this one uses 's' and is a positive test */ if (do_utf8) { - while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) )) + while (s < strend && isSPACE_utf8(s) ) s += UTF8SKIP(s); } - else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) + { while (s < strend && isSPACE_LC(*s)) ++s; } else { @@ -5533,7 +5723,7 @@ PP(pp_split) else if (do_utf8 == (RX_UTF8(rx) != 0) && (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx) && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL) - && !(RX_EXTFLAGS(rx) & RXf_ANCH)) { + && !(RX_EXTFLAGS(rx) & RXf_IS_ANCHORED)) { const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL); SV * const csv = CALLREG_INTUIT_STRING(rx); @@ -5553,7 +5743,7 @@ PP(pp_split) trailing_empty = 0; } else { dstr = newSVpvn_flags(s, m-s, - (do_utf8 ? SVf_UTF8 : 0) | make_mortal); + (do_utf8 ? SVf_UTF8 : 0) | make_mortal); XPUSHs(dstr); } /* The rx->minlen is in characters but we want to step @@ -5577,7 +5767,7 @@ PP(pp_split) trailing_empty = 0; } else { dstr = newSVpvn_flags(s, m-s, - (do_utf8 ? SVf_UTF8 : 0) | make_mortal); + (do_utf8 ? SVf_UTF8 : 0) | make_mortal); XPUSHs(dstr); } /* The rx->minlen is in characters but we want to step @@ -5595,7 +5785,7 @@ PP(pp_split) { I32 rex_return; PUTBACK; - rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 , + rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1, sv, NULL, 0); SPAGAIN; if (rex_return == 0) @@ -5697,11 +5887,11 @@ PP(pp_split) else { PUTBACK; ENTER_with_name("call_PUSH"); - call_method("PUSH",G_SCALAR|G_DISCARD); + call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED); LEAVE_with_name("call_PUSH"); SPAGAIN; if (gimme == G_ARRAY) { - I32 i; + SSize_t i; /* EXTEND should not be needed - we just popped them */ EXTEND(SP, iters); for (i=0; i < iters; i++) { @@ -5863,7 +6053,7 @@ PP(pp_coreargs) const bool constr = PL_op->op_private & whicharg; PUSHs(S_rv2gv(aTHX_ svp && *svp ? *svp : &PL_sv_undef, - constr, CopHINTS_get(PL_curcop) & HINT_STRICT_REFS, + constr, cBOOL(CopHINTS_get(PL_curcop) & HINT_STRICT_REFS), !constr )); } @@ -5885,7 +6075,6 @@ PP(pp_coreargs) ) ) DIE(aTHX_ - /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/ "Type of arg %d to &CORE::%s must be %s", whicharg, PL_op_name[opnum], wantscalar