X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/cab190d4d715e2c6ec26ee0ebfd0bc6fb0f03bd3..59b085e1644b14873900cf449944e1ed94f075d3:/pp.c

diff --git a/pp.c b/pp.c
index 99f0b06..ba01223 100644
--- a/pp.c
+++ b/pp.c
@@ -61,7 +61,8 @@ PP(pp_padav)
     dVAR; dSP; dTARGET;
     I32 gimme;
     if (PL_op->op_private & OPpLVAL_INTRO)
-	SAVECLEARSV(PAD_SVl(PL_op->op_targ));
+	if (!(PL_op->op_private & OPpPAD_STATE))
+	    SAVECLEARSV(PAD_SVl(PL_op->op_targ));
     EXTEND(SP, 1);
     if (PL_op->op_flags & OPf_REF) {
 	PUSHs(TARG);
@@ -104,7 +105,8 @@ PP(pp_padhv)
 
     XPUSHs(TARG);
     if (PL_op->op_private & OPpLVAL_INTRO)
-	SAVECLEARSV(PAD_SVl(PL_op->op_targ));
+	if (!(PL_op->op_private & OPpPAD_STATE))
+	    SAVECLEARSV(PAD_SVl(PL_op->op_targ));
     if (PL_op->op_flags & OPf_REF)
 	RETURN;
     else if (LVRET) {
@@ -237,6 +239,7 @@ PP(pp_rv2sv)
 	case SVt_PVFM:
 	case SVt_PVIO:
 	    DIE(aTHX_ "Not a SCALAR reference");
+	default: NOOP;
 	}
     }
     else {
@@ -334,7 +337,7 @@ PP(pp_pos)
 		I32 i = mg->mg_len;
 		if (DO_UTF8(sv))
 		    sv_pos_b2u(sv, &i);
-		PUSHi(i + PL_curcop->cop_arybase);
+		PUSHi(i + CopARYBASE_get(PL_curcop));
 		RETURN;
 	    }
 	}
@@ -346,7 +349,7 @@ PP(pp_rv2cv)
 {
     dVAR; dSP;
     GV *gv;
-    HV *stash;
+    HV *stash_unused;
     const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
 	? 0
 	: ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
@@ -355,7 +358,7 @@ PP(pp_rv2cv)
     /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
     /* (But not in defined().) */
 
-    CV *cv = sv_2cv(TOPs, &stash, &gv, flags);
+    CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
     if (cv) {
 	if (CvCLONE(cv))
 	    cv = (CV*)sv_2mortal((SV*)cv_clone(cv));
@@ -386,15 +389,15 @@ PP(pp_prototype)
     if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
 	const char * const s = SvPVX_const(TOPs);
 	if (strnEQ(s, "CORE::", 6)) {
-	    const int code = keyword(s + 6, SvCUR(TOPs) - 6);
+	    const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
 	    if (code < 0) {	/* Overridable. */
 #define MAX_ARGS_OP ((sizeof(I32) - 1) * 2)
-		int i = 0, n = 0, seen_question = 0;
+		int i = 0, n = 0, seen_question = 0, defgv = 0;
 		I32 oa;
 		char str[ MAX_ARGS_OP * 2 + 2 ]; /* One ';', one '\0' */
 
 		if (code == -KEY_chop || code == -KEY_chomp
-			|| code == -KEY_exec || code == -KEY_system)
+			|| code == -KEY_exec || code == -KEY_system || code == -KEY_err)
 		    goto set;
 		while (i < MAXO) {	/* The slow way. */
 		    if (strEQ(s + 6, PL_op_name[i])
@@ -406,9 +409,10 @@ PP(pp_prototype)
 		}
 		goto nonesuch;		/* Should not happen... */
 	      found:
+		defgv = PL_opargs[i] & OA_DEFGV;
 		oa = PL_opargs[i] >> OASHIFT;
 		while (oa) {
-		    if (oa & OA_OPTIONAL && !seen_question) {
+		    if (oa & OA_OPTIONAL && !seen_question && !defgv) {
 			seen_question = 1;
 			str[n++] = ';';
 		    }
@@ -422,6 +426,8 @@ PP(pp_prototype)
 		    str[n++] = ("?$@@%&*$")[oa & (OA_OPTIONAL - 1)];
 		    oa = oa >> 4;
 		}
+		if (defgv && str[n - 1] == '$')
+		    str[n - 1] = '_';
 		str[n++] = '\0';
 		ret = sv_2mortal(newSVpvn(str, n - 1));
 	    }
@@ -641,7 +647,7 @@ PP(pp_study)
     }
     s = (unsigned char*)(SvPV(sv, len));
     pos = len;
-    if (pos <= 0 || !SvPOK(sv)) {
+    if (pos <= 0 || !SvPOK(sv) || SvUTF8(sv)) {
 	/* No point in studying a zero length string, and not safe to study
 	   anything that doesn't appear to be a simple scalar (and hence might
 	   change between now and when the regexp engine runs without our set
@@ -998,7 +1004,47 @@ PP(pp_pow)
 #endif    
     {
 	dPOPTOPnnrl;
+
+#if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
+    /*
+    We are building perl with long double support and are on an AIX OS
+    afflicted with a powl() function that wrongly returns NaNQ for any
+    negative base.  This was reported to IBM as PMR #23047-379 on
+    03/06/2006.  The problem exists in at least the following versions
+    of AIX and the libm fileset, and no doubt others as well:
+
+	AIX 4.3.3-ML10      bos.adt.libm 4.3.3.50
+	AIX 5.1.0-ML04      bos.adt.libm 5.1.0.29
+	AIX 5.2.0           bos.adt.libm 5.2.0.85
+
+    So, until IBM fixes powl(), we provide the following workaround to
+    handle the problem ourselves.  Our logic is as follows: for
+    negative bases (left), we use fmod(right, 2) to check if the
+    exponent is an odd or even integer:
+
+	- if odd,  powl(left, right) == -powl(-left, right)
+	- if even, powl(left, right) ==  powl(-left, right)
+
+    If the exponent is not an integer, the result is rightly NaNQ, so
+    we just return that (as NV_NAN).
+    */
+
+	if (left < 0.0) {
+	    NV mod2 = Perl_fmod( right, 2.0 );
+	    if (mod2 == 1.0 || mod2 == -1.0) {	/* odd integer */
+		SETn( -Perl_pow( -left, right) );
+	    } else if (mod2 == 0.0) {		/* even integer */
+		SETn( Perl_pow( -left, right) );
+	    } else {				/* fractional power */
+		SETn( NV_NAN );
+	    }
+	} else {
+	    SETn( Perl_pow( left, right) );
+	}
+#else
 	SETn( Perl_pow( left, right) );
+#endif  /* HAS_AIX_POWL_NEG_BASE_BUG */
+
 #ifdef PERL_PRESERVE_IVUV
 	if (is_int)
 	    SvIV_please(TOPs);
@@ -1057,7 +1103,7 @@ PP(pp_multiply)
 	    bhigh = blow >> (4 * sizeof (UV));
 	    blow &= botmask;
 	    if (ahigh && bhigh) {
-		/*EMPTY*/;
+		NOOP;
 		/* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
 		   which is overflow. Drop to NVs below.  */
 	    } else if (!ahigh && !bhigh) {
@@ -2389,16 +2435,16 @@ PP(pp_complement)
 	if (SvUTF8(TARG)) {
 	  /* Calculate exact length, let's not estimate. */
 	  STRLEN targlen = 0;
-	  U8 *result;
-	  U8 *send;
 	  STRLEN l;
 	  UV nchar = 0;
 	  UV nwide = 0;
+	  U8 * const send = tmps + len;
+	  U8 * const origtmps = tmps;
+	  const UV utf8flags = UTF8_ALLOW_ANYUV;
 
-	  send = tmps + len;
 	  while (tmps < send) {
-	    const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
-	    tmps += UTF8SKIP(tmps);
+	    const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
+	    tmps += l;
 	    targlen += UNISKIP(~c);
 	    nchar++;
 	    if (c > 0xff)
@@ -2406,33 +2452,39 @@ PP(pp_complement)
 	  }
 
 	  /* Now rewind strings and write them. */
-	  tmps -= len;
+	  tmps = origtmps;
 
 	  if (nwide) {
-	      Newxz(result, targlen + 1, U8);
+	      U8 *result;
+	      U8 *p;
+
+	      Newx(result, targlen + 1, U8);
+	      p = result;
 	      while (tmps < send) {
-		  const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
-		  tmps += UTF8SKIP(tmps);
-		  result = uvchr_to_utf8_flags(result, ~c, UNICODE_ALLOW_ANY);
+		  const UV c = utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
+		  tmps += l;
+		  p = uvchr_to_utf8_flags(p, ~c, UNICODE_ALLOW_ANY);
 	      }
-	      *result = '\0';
-	      result -= targlen;
-	      sv_setpvn(TARG, (char*)result, targlen);
+	      *p = '\0';
+	      sv_usepvn_flags(TARG, (char*)result, targlen,
+			      SV_HAS_TRAILING_NUL);
 	      SvUTF8_on(TARG);
 	  }
 	  else {
-	      Newxz(result, nchar + 1, U8);
+	      U8 *result;
+	      U8 *p;
+
+	      Newx(result, nchar + 1, U8);
+	      p = result;
 	      while (tmps < send) {
-		  const U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY);
-		  tmps += UTF8SKIP(tmps);
-		  *result++ = ~c;
+		  const U8 c = (U8)utf8n_to_uvchr(tmps, send-tmps, &l, utf8flags);
+		  tmps += l;
+		  *p++ = ~c;
 	      }
-	      *result = '\0';
-	      result -= nchar;
-	      sv_setpvn(TARG, (char*)result, nchar);
+	      *p = '\0';
+	      sv_usepvn_flags(TARG, (char*)result, nchar, SV_HAS_TRAILING_NUL);
 	      SvUTF8_off(TARG);
 	  }
-	  Safefree(result);
 	  SETs(TARG);
 	  RETURN;
 	}
@@ -2442,7 +2494,7 @@ PP(pp_complement)
 	    for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
 		*tmps = ~*tmps;
 	    tmpl = (long*)tmps;
-	    for ( ; anum >= sizeof(long); anum -= sizeof(long), tmpl++)
+	    for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
 		*tmpl = ~*tmpl;
 	    tmps = (U8*)tmpl;
 	}
@@ -2904,7 +2956,22 @@ PP(pp_length)
     dVAR; dSP; dTARGET;
     SV * const sv = TOPs;
 
-    if (DO_UTF8(sv))
+    if (SvAMAGIC(sv)) {
+	/* For an overloaded scalar, we can't know in advance if it's going to
+	   be UTF-8 or not. Also, we can't call sv_len_utf8 as it likes to
+	   cache the length. Maybe that should be a documented feature of it.
+	*/
+	STRLEN len;
+	const char *const p = SvPV_const(sv, len);
+
+	if (DO_UTF8(sv)) {
+	    SETi(utf8_length((U8*)p, (U8*)p + len));
+	}
+	else
+	    SETi(len);
+
+    }
+    else if (DO_UTF8(sv))
 	SETi(sv_len_utf8(sv));
     else
 	SETi(sv_len(sv));
@@ -2923,7 +2990,7 @@ PP(pp_substr)
     I32 fail;
     const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
     const char *tmps;
-    const I32 arybase = PL_curcop->cop_arybase;
+    const I32 arybase = CopARYBASE_get(PL_curcop);
     SV *repl_sv = NULL;
     const char *repl = NULL;
     STRLEN repl_len;
@@ -3063,8 +3130,6 @@ PP(pp_substr)
 		sv_upgrade(TARG, SVt_PVLV);
 		sv_magic(TARG, NULL, PERL_MAGIC_substr, NULL, 0);
 	    }
-	    else
-		SvOK_off(TARG);
 
 	    LvTYPE(TARG) = 'x';
 	    if (LvTARG(TARG) != sv) {
@@ -3122,9 +3187,9 @@ PP(pp_index)
     STRLEN llen = 0;
     I32 offset;
     I32 retval;
-    const char *tmps;
-    const char *tmps2;
-    const I32 arybase = PL_curcop->cop_arybase;
+    const char *big_p;
+    const char *little_p;
+    const I32 arybase = CopARYBASE_get(PL_curcop);
     bool big_utf8;
     bool little_utf8;
     const bool is_index = PL_op->op_type == OP_INDEX;
@@ -3136,6 +3201,9 @@ PP(pp_index)
     }
     little = POPs;
     big = POPs;
+    big_p = SvPV_const(big, biglen);
+    little_p = SvPV_const(little, llen);
+
     big_utf8 = DO_UTF8(big);
     little_utf8 = DO_UTF8(little);
     if (big_utf8 ^ little_utf8) {
@@ -3143,9 +3211,7 @@ PP(pp_index)
 	if (little_utf8 && !PL_encoding) {
 	    /* Well, maybe instead we might be able to downgrade the small
 	       string?  */
-	    STRLEN little_len;
-	    const U8 * const little_pv = (U8*) SvPV_const(little, little_len);
-	    char * const pv = (char*)bytes_from_utf8(little_pv, &little_len,
+	    char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
 						     &little_utf8);
 	    if (little_utf8) {
 		/* If the large string is ISO-8859-1, and it's not possible to
@@ -3158,13 +3224,11 @@ PP(pp_index)
 	    /* At this point, pv is a malloc()ed string. So donate it to temp
 	       to ensure it will get free()d  */
 	    little = temp = newSV(0);
-	    sv_usepvn(temp, pv, little_len);
+	    sv_usepvn(temp, pv, llen);
+	    little_p = SvPVX(little);
 	} else {
-	    SV * const bytes = little_utf8 ? big : little;
-	    STRLEN len;
-	    const char * const p = SvPV_const(bytes, len);
-
-	    temp = newSVpvn(p, len);
+	    temp = little_utf8
+		? newSVpvn(big_p, biglen) : newSVpvn(little_p, llen);
 
 	    if (PL_encoding) {
 		sv_recode_to_utf8(temp, PL_encoding);
@@ -3174,34 +3238,58 @@ PP(pp_index)
 	    if (little_utf8) {
 		big = temp;
 		big_utf8 = TRUE;
+		big_p = SvPV_const(big, biglen);
 	    } else {
 		little = temp;
+		little_p = SvPV_const(little, llen);
 	    }
 	}
     }
-    /* Don't actually need the NULL initialisation, but it keeps gcc quiet.  */
-    tmps2 = is_index ? NULL : SvPV_const(little, llen);
-    tmps = SvPV_const(big, biglen);
+    if (SvGAMAGIC(big)) {
+	/* Life just becomes a lot easier if I use a temporary here.
+	   Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
+	   will trigger magic and overloading again, as will fbm_instr()
+	*/
+	big = sv_2mortal(newSVpvn(big_p, biglen));
+	if (big_utf8)
+	    SvUTF8_on(big);
+	big_p = SvPVX(big);
+    }
+    if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
+	/* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
+	   warn on undef, and we've already triggered a warning with the
+	   SvPV_const some lines above. We can't remove that, as we need to
+	   call some SvPV to trigger overloading early and find out if the
+	   string is UTF-8.
+	   This is all getting to messy. The API isn't quite clean enough,
+	   because data access has side effects.
+	*/
+	little = sv_2mortal(newSVpvn(little_p, llen));
+	if (little_utf8)
+	    SvUTF8_on(little);
+	little_p = SvPVX(little);
+    }
 
     if (MAXARG < 3)
 	offset = is_index ? 0 : biglen;
     else {
 	if (big_utf8 && offset > 0)
 	    sv_pos_u2b(big, &offset, 0);
-	offset += llen;
+	if (!is_index)
+	    offset += llen;
     }
     if (offset < 0)
 	offset = 0;
     else if (offset > (I32)biglen)
 	offset = biglen;
-    if (!(tmps2 = is_index
-	  ? fbm_instr((unsigned char*)tmps + offset,
-		      (unsigned char*)tmps + biglen, little, 0)
-	  : rninstr(tmps,  tmps  + offset,
-		    tmps2, tmps2 + llen)))
+    if (!(little_p = is_index
+	  ? fbm_instr((unsigned char*)big_p + offset,
+		      (unsigned char*)big_p + biglen, little, 0)
+	  : rninstr(big_p,  big_p  + offset,
+		    little_p, little_p + llen)))
 	retval = -1;
     else {
-	retval = tmps2 - tmps;
+	retval = little_p - big_p;
 	if (retval > 0 && big_utf8)
 	    sv_pos_b2u(big, &retval);
     }
@@ -3225,13 +3313,13 @@ PP(pp_sprintf)
 PP(pp_ord)
 {
     dVAR; dSP; dTARGET;
+
     SV *argsv = POPs;
     STRLEN len;
     const U8 *s = (U8*)SvPV_const(argsv, len);
-    SV *tmpsv;
 
     if (PL_encoding && SvPOK(argsv) && !DO_UTF8(argsv)) {
-        tmpsv = sv_2mortal(newSVsv(argsv));
+        SV * const tmpsv = sv_2mortal(newSVsv(argsv));
         s = (U8*)sv_recode_to_utf8(tmpsv, PL_encoding);
         argsv = tmpsv;
     }
@@ -3281,20 +3369,21 @@ PP(pp_chr)
     *tmps++ = (char)value;
     *tmps = '\0';
     (void)SvPOK_only(TARG);
+
     if (PL_encoding && !IN_BYTES) {
         sv_recode_to_utf8(TARG, PL_encoding);
 	tmps = SvPVX(TARG);
 	if (SvCUR(TARG) == 0 || !is_utf8_string((U8*)tmps, SvCUR(TARG)) ||
-	    memEQ(tmps, "\xef\xbf\xbd\0", 4)) {
-	    SvGROW(TARG, 3);
+	    UNICODE_IS_REPLACEMENT(utf8_to_uvchr((U8*)tmps, NULL))) {
+	    SvGROW(TARG, 2);
 	    tmps = SvPVX(TARG);
-	    SvCUR_set(TARG, 2);
-	    *tmps++ = (U8)UTF8_EIGHT_BIT_HI(value);
-	    *tmps++ = (U8)UTF8_EIGHT_BIT_LO(value);
+	    SvCUR_set(TARG, 1);
+	    *tmps++ = (char)value;
 	    *tmps = '\0';
-	    SvUTF8_on(TARG);
+	    SvUTF8_off(TARG);
 	}
     }
+
     XPUSHs(TARG);
     RETURN;
 }
@@ -3352,28 +3441,64 @@ PP(pp_ucfirst)
 {
     dVAR;
     dSP;
-    SV *sv = TOPs;
-    const U8 *s;
+    SV *source = TOPs;
     STRLEN slen;
+    STRLEN need;
+    SV *dest;
+    bool inplace = TRUE;
+    bool doing_utf8;
     const int op_type = PL_op->op_type;
+    const U8 *s;
+    U8 *d;
+    U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
+    STRLEN ulen;
+    STRLEN tculen;
+
+    SvGETMAGIC(source);
+    if (SvOK(source)) {
+	s = (const U8*)SvPV_nomg_const(source, slen);
+    } else {
+	s = (const U8*)"";
+	slen = 0;
+    }
 
-    SvGETMAGIC(sv);
-    if (DO_UTF8(sv) &&
-	(s = (const U8*)SvPV_nomg_const(sv, slen)) && slen &&
-	UTF8_IS_START(*s)) {
-	U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
-	STRLEN ulen;
-	STRLEN tculen;
-
+    if (slen && DO_UTF8(source) && UTF8_IS_START(*s)) {
+	doing_utf8 = TRUE;
 	utf8_to_uvchr(s, &ulen);
 	if (op_type == OP_UCFIRST) {
 	    toTITLE_utf8(s, tmpbuf, &tculen);
 	} else {
 	    toLOWER_utf8(s, tmpbuf, &tculen);
 	}
+	/* If the two differ, we definately cannot do inplace.  */
+	inplace = (ulen == tculen);
+	need = slen + 1 - ulen + tculen;
+    } else {
+	doing_utf8 = FALSE;
+	need = slen + 1;
+    }
+
+    if (SvPADTMP(source) && !SvREADONLY(source) && inplace) {
+	/* We can convert in place.  */
+
+	dest = source;
+	s = d = (U8*)SvPV_force_nomg(source, slen);
+    } else {
+	dTARGET;
+
+	dest = TARG;
+
+	SvUPGRADE(dest, SVt_PV);
+	d = (U8*)SvGROW(dest, need);
+	(void)SvPOK_only(dest);
+
+	SETs(dest);
+
+	inplace = FALSE;
+    }
 
-	if (!SvPADTMP(sv) || SvREADONLY(sv) || ulen != tculen) {
-	    dTARGET;
+    if (doing_utf8) {
+	if(!inplace) {
 	    /* slen is the byte length of the whole SV.
 	     * ulen is the byte length of the original Unicode character
 	     * stored as UTF-8 at s.
@@ -3381,127 +3506,144 @@ PP(pp_ucfirst)
 	     * lowercased) Unicode character stored as UTF-8 at tmpbuf.
 	     * We first set the result to be the titlecased (/lowercased)
 	     * character, and then append the rest of the SV data. */
-	    sv_setpvn(TARG, (char*)tmpbuf, tculen);
+	    sv_setpvn(dest, (char*)tmpbuf, tculen);
 	    if (slen > ulen)
-	        sv_catpvn(TARG, (char*)(s + ulen), slen - ulen);
-	    SvUTF8_on(TARG);
-	    sv = TARG;
-	    SETs(sv);
+	        sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
+	    SvUTF8_on(dest);
 	}
 	else {
-	    s = (U8*)SvPV_force_nomg(sv, slen);
-	    Copy(tmpbuf, s, tculen, U8);
+	    Copy(tmpbuf, d, tculen, U8);
+	    SvCUR_set(dest, need - 1);
 	}
     }
     else {
-	U8 *s1;
-	if (!SvPADTMP(sv) || SvREADONLY(sv)) {
-	    dTARGET;
-	    SvUTF8_off(TARG);				/* decontaminate */
-	    sv_setsv_nomg(TARG, sv);
-	    sv = TARG;
-	    SETs(sv);
-	}
-	s1 = (U8*)SvPV_force_nomg(sv, slen);
-	if (*s1) {
+	if (*s) {
 	    if (IN_LOCALE_RUNTIME) {
 		TAINT;
-		SvTAINTED_on(sv);
-		*s1 = (op_type == OP_UCFIRST)
-		    ? toUPPER_LC(*s1) : toLOWER_LC(*s1);
+		SvTAINTED_on(dest);
+		*d = (op_type == OP_UCFIRST)
+		    ? toUPPER_LC(*s) : toLOWER_LC(*s);
 	    }
 	    else
-		*s1 = (op_type == OP_UCFIRST) ? toUPPER(*s1) : toLOWER(*s1);
+		*d = (op_type == OP_UCFIRST) ? toUPPER(*s) : toLOWER(*s);
+	} else {
+	    /* See bug #39028  */
+	    *d = *s;
+	}
+
+	if (SvUTF8(source))
+	    SvUTF8_on(dest);
+
+	if (!inplace) {
+	    /* This will copy the trailing NUL  */
+	    Copy(s + 1, d + 1, slen, U8);
+	    SvCUR_set(dest, need - 1);
 	}
     }
-    SvSETMAGIC(sv);
+    SvSETMAGIC(dest);
     RETURN;
 }
 
+/* There's so much setup/teardown code common between uc and lc, I wonder if
+   it would be worth merging the two, and just having a switch outside each
+   of the three tight loops.  */
 PP(pp_uc)
 {
     dVAR;
     dSP;
-    SV *sv = TOPs;
+    SV *source = TOPs;
     STRLEN len;
+    STRLEN min;
+    SV *dest;
+    const U8 *s;
+    U8 *d;
 
-    SvGETMAGIC(sv);
-    if (DO_UTF8(sv)) {
+    SvGETMAGIC(source);
+
+    if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
+	&& !DO_UTF8(source)) {
+	/* We can convert in place.  */
+
+	dest = source;
+	s = d = (U8*)SvPV_force_nomg(source, len);
+	min = len + 1;
+    } else {
 	dTARGET;
-	STRLEN ulen;
-	register U8 *d;
-	const U8 *s;
-	const U8 *send;
-	U8 tmpbuf[UTF8_MAXBYTES+1];
 
-	s = (const U8*)SvPV_nomg_const(sv,len);
-	if (!len) {
-	    SvUTF8_off(TARG);				/* decontaminate */
-	    sv_setpvn(TARG, "", 0);
-	    sv = TARG;
-	    SETs(sv);
-	}
-	else {
-	    STRLEN min = len + 1;
+	dest = TARG;
 
-	    SvUPGRADE(TARG, SVt_PV);
-	    SvGROW(TARG, min);
-	    (void)SvPOK_only(TARG);
-	    d = (U8*)SvPVX(TARG);
-	    send = s + len;
-	    while (s < send) {
-		STRLEN u = UTF8SKIP(s);
-
-		toUPPER_utf8(s, tmpbuf, &ulen);
-		if (ulen > u && (SvLEN(TARG) < (min += ulen - u))) {
-		    /* If the eventually required minimum size outgrows
-		     * the available space, we need to grow. */
-		    const UV o = d - (U8*)SvPVX_const(TARG);
-
-		    /* If someone uppercases one million U+03B0s we
-		     * SvGROW() one million times.  Or we could try
-		     * guessing how much to allocate without allocating
-		     * too much. Such is life. */
-		    SvGROW(TARG, min);
-		    d = (U8*)SvPVX(TARG) + o;
-		}
-		Copy(tmpbuf, d, ulen, U8);
-		d += ulen;
-		s += u;
-	    }
-	    *d = '\0';
-	    SvUTF8_on(TARG);
-	    SvCUR_set(TARG, d - (U8*)SvPVX_const(TARG));
-	    sv = TARG;
-	    SETs(sv);
+	/* The old implementation would copy source into TARG at this point.
+	   This had the side effect that if source was undef, TARG was now
+	   an undefined SV with PADTMP set, and they don't warn inside
+	   sv_2pv_flags(). However, we're now getting the PV direct from
+	   source, which doesn't have PADTMP set, so it would warn. Hence the
+	   little games.  */
+
+	if (SvOK(source)) {
+	    s = (const U8*)SvPV_nomg_const(source, len);
+	} else {
+	    s = (const U8*)"";
+	    len = 0;
 	}
+	min = len + 1;
+
+	SvUPGRADE(dest, SVt_PV);
+	d = (U8*)SvGROW(dest, min);
+	(void)SvPOK_only(dest);
+
+	SETs(dest);
     }
-    else {
-	U8 *s;
-	if (!SvPADTMP(sv) || SvREADONLY(sv)) {
-	    dTARGET;
-	    SvUTF8_off(TARG);				/* decontaminate */
-	    sv_setsv_nomg(TARG, sv);
-	    sv = TARG;
-	    SETs(sv);
+
+    /* Overloaded values may have toggled the UTF-8 flag on source, so we need
+       to check DO_UTF8 again here.  */
+
+    if (DO_UTF8(source)) {
+	const U8 *const send = s + len;
+	U8 tmpbuf[UTF8_MAXBYTES+1];
+
+	while (s < send) {
+	    const STRLEN u = UTF8SKIP(s);
+	    STRLEN ulen;
+
+	    toUPPER_utf8(s, tmpbuf, &ulen);
+	    if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
+		/* If the eventually required minimum size outgrows
+		 * the available space, we need to grow. */
+		const UV o = d - (U8*)SvPVX_const(dest);
+
+		/* If someone uppercases one million U+03B0s we SvGROW() one
+		 * million times.  Or we could try guessing how much to
+		 allocate without allocating too much.  Such is life. */
+		SvGROW(dest, min);
+		d = (U8*)SvPVX(dest) + o;
+	    }
+	    Copy(tmpbuf, d, ulen, U8);
+	    d += ulen;
+	    s += u;
 	}
-	s = (U8*)SvPV_force_nomg(sv, len);
+	SvUTF8_on(dest);
+	*d = '\0';
+	SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+    } else {
 	if (len) {
-	    register const U8 *send = s + len;
-
+	    const U8 *const send = s + len;
 	    if (IN_LOCALE_RUNTIME) {
 		TAINT;
-		SvTAINTED_on(sv);
-		for (; s < send; s++)
-		    *s = toUPPER_LC(*s);
+		SvTAINTED_on(dest);
+		for (; s < send; d++, s++)
+		    *d = toUPPER_LC(*s);
 	    }
 	    else {
-		for (; s < send; s++)
-		    *s = toUPPER(*s);
+		for (; s < send; d++, s++)
+		    *d = toUPPER(*s);
 	    }
 	}
+	if (source != dest) {
+	    *d = '\0';
+	    SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+	}
     }
-    SvSETMAGIC(sv);
+    SvSETMAGIC(dest);
     RETURN;
 }
 
@@ -3509,107 +3651,115 @@ PP(pp_lc)
 {
     dVAR;
     dSP;
-    SV *sv = TOPs;
+    SV *source = TOPs;
     STRLEN len;
+    STRLEN min;
+    SV *dest;
+    const U8 *s;
+    U8 *d;
 
-    SvGETMAGIC(sv);
-    if (DO_UTF8(sv)) {
+    SvGETMAGIC(source);
+
+    if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
+	&& !DO_UTF8(source)) {
+	/* We can convert in place.  */
+
+	dest = source;
+	s = d = (U8*)SvPV_force_nomg(source, len);
+	min = len + 1;
+    } else {
 	dTARGET;
-	const U8 *s;
-	STRLEN ulen;
-	register U8 *d;
-	const U8 *send;
-	U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
 
-	s = (const U8*)SvPV_nomg_const(sv,len);
-	if (!len) {
-	    SvUTF8_off(TARG);				/* decontaminate */
-	    sv_setpvn(TARG, "", 0);
-	    sv = TARG;
-	    SETs(sv);
+	dest = TARG;
+
+	/* The old implementation would copy source into TARG at this point.
+	   This had the side effect that if source was undef, TARG was now
+	   an undefined SV with PADTMP set, and they don't warn inside
+	   sv_2pv_flags(). However, we're now getting the PV direct from
+	   source, which doesn't have PADTMP set, so it would warn. Hence the
+	   little games.  */
+
+	if (SvOK(source)) {
+	    s = (const U8*)SvPV_nomg_const(source, len);
+	} else {
+	    s = (const U8*)"";
+	    len = 0;
 	}
-	else {
-	    STRLEN min = len + 1;
+	min = len + 1;
 
-	    SvUPGRADE(TARG, SVt_PV);
-	    SvGROW(TARG, min);
-	    (void)SvPOK_only(TARG);
-	    d = (U8*)SvPVX(TARG);
-	    send = s + len;
-	    while (s < send) {
-		const STRLEN u = UTF8SKIP(s);
-		const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
+	SvUPGRADE(dest, SVt_PV);
+	d = (U8*)SvGROW(dest, min);
+	(void)SvPOK_only(dest);
+
+	SETs(dest);
+    }
+
+    /* Overloaded values may have toggled the UTF-8 flag on source, so we need
+       to check DO_UTF8 again here.  */
+
+    if (DO_UTF8(source)) {
+	const U8 *const send = s + len;
+	U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
+
+	while (s < send) {
+	    const STRLEN u = UTF8SKIP(s);
+	    STRLEN ulen;
+	    const UV uv = toLOWER_utf8(s, tmpbuf, &ulen);
 
 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 /* Unicode U+03A3 */
-		if (uv == GREEK_CAPITAL_LETTER_SIGMA) {
-		    /*EMPTY*/
-		     /*
-		      * Now if the sigma is NOT followed by
-		      * /$ignorable_sequence$cased_letter/;
-		      * and it IS preceded by
-		      * /$cased_letter$ignorable_sequence/;
-		      * where $ignorable_sequence is
-		      * [\x{2010}\x{AD}\p{Mn}]*
-		      * and $cased_letter is
-		      * [\p{Ll}\p{Lo}\p{Lt}]
-		      * then it should be mapped to 0x03C2,
-		      * (GREEK SMALL LETTER FINAL SIGMA),
-		      * instead of staying 0x03A3.
-		      * "should be": in other words,
-		      * this is not implemented yet.
-		      * See lib/unicore/SpecialCasing.txt.
-		      */
-		}
-		if (ulen > u && (SvLEN(TARG) < (min += ulen - u))) {
-		    /* If the eventually required minimum size outgrows
-		     * the available space, we need to grow. */
-		    const UV o = d - (U8*)SvPVX_const(TARG);
-
-		    /* If someone lowercases one million U+0130s we
-		     * SvGROW() one million times.  Or we could try
-		     * guessing how much to allocate without allocating.
-		     * too much.  Such is life. */
-		    SvGROW(TARG, min);
-		    d = (U8*)SvPVX(TARG) + o;
-		}
-		Copy(tmpbuf, d, ulen, U8);
-		d += ulen;
-		s += u;
+	    if (uv == GREEK_CAPITAL_LETTER_SIGMA) {
+		NOOP;
+		/*
+		 * Now if the sigma is NOT followed by
+		 * /$ignorable_sequence$cased_letter/;
+		 * and it IS preceded by /$cased_letter$ignorable_sequence/;
+		 * where $ignorable_sequence is [\x{2010}\x{AD}\p{Mn}]*
+		 * and $cased_letter is [\p{Ll}\p{Lo}\p{Lt}]
+		 * then it should be mapped to 0x03C2,
+		 * (GREEK SMALL LETTER FINAL SIGMA),
+		 * instead of staying 0x03A3.
+		 * "should be": in other words, this is not implemented yet.
+		 * See lib/unicore/SpecialCasing.txt.
+		 */
 	    }
-	    *d = '\0';
-	    SvUTF8_on(TARG);
-	    SvCUR_set(TARG, d - (U8*)SvPVX_const(TARG));
-	    sv = TARG;
-	    SETs(sv);
-	}
-    }
-    else {
-	U8 *s;
-	if (!SvPADTMP(sv) || SvREADONLY(sv)) {
-	    dTARGET;
-	    SvUTF8_off(TARG);				/* decontaminate */
-	    sv_setsv_nomg(TARG, sv);
-	    sv = TARG;
-	    SETs(sv);
+	    if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
+		/* If the eventually required minimum size outgrows
+		 * the available space, we need to grow. */
+		const UV o = d - (U8*)SvPVX_const(dest);
+
+		/* If someone lowercases one million U+0130s we SvGROW() one
+		 * million times.  Or we could try guessing how much to
+		 allocate without allocating too much.  Such is life. */
+		SvGROW(dest, min);
+		d = (U8*)SvPVX(dest) + o;
+	    }
+	    Copy(tmpbuf, d, ulen, U8);
+	    d += ulen;
+	    s += u;
 	}
-
-	s = (U8*)SvPV_force_nomg(sv, len);
+	SvUTF8_on(dest);
+	*d = '\0';
+	SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+    } else {
 	if (len) {
-	    register const U8 * const send = s + len;
-
+	    const U8 *const send = s + len;
 	    if (IN_LOCALE_RUNTIME) {
 		TAINT;
-		SvTAINTED_on(sv);
-		for (; s < send; s++)
-		    *s = toLOWER_LC(*s);
+		SvTAINTED_on(dest);
+		for (; s < send; d++, s++)
+		    *d = toLOWER_LC(*s);
 	    }
 	    else {
-		for (; s < send; s++)
-		    *s = toLOWER(*s);
+		for (; s < send; d++, s++)
+		    *d = toLOWER(*s);
 	    }
 	}
+	if (source != dest) {
+	    *d = '\0';
+	    SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
+	}
     }
-    SvSETMAGIC(sv);
+    SvSETMAGIC(dest);
     RETURN;
 }
 
@@ -3673,7 +3823,7 @@ PP(pp_aslice)
     register const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
 
     if (SvTYPE(av) == SVt_PVAV) {
-	const I32 arybase = PL_curcop->cop_arybase;
+	const I32 arybase = CopARYBASE_get(PL_curcop);
 	if (lval && PL_op->op_private & OPpLVAL_INTRO) {
 	    register SV **svp;
 	    I32 max = -1;
@@ -3715,7 +3865,7 @@ PP(pp_each)
 {
     dVAR;
     dSP;
-    HV * const hash = (HV*)POPs;
+    HV * hash = (HV*)POPs;
     HE *entry;
     const I32 gimme = GIMME_V;
 
@@ -3886,8 +4036,9 @@ PP(pp_hslice)
 			save_helem(hv, keysv, svp);
 		    else {
 			STRLEN keylen;
-			const char *key = SvPV_const(keysv, keylen);
-			SAVEDELETE(hv, savepvn(key,keylen), keylen);
+			const char * const key = SvPV_const(keysv, keylen);
+			SAVEDELETE(hv, savepvn(key,keylen),
+				   SvUTF8(keysv) ? -(I32)keylen : (I32)keylen);
 		    }
 		}
             }
@@ -3925,8 +4076,8 @@ PP(pp_lslice)
     SV ** const lastlelem = PL_stack_base + POPMARK;
     SV ** const firstlelem = PL_stack_base + POPMARK + 1;
     register SV ** const firstrelem = lastlelem + 1;
-    const I32 arybase = PL_curcop->cop_arybase;
-    I32 is_something_there = PL_op->op_flags & OPf_MOD;
+    const I32 arybase = CopARYBASE_get(PL_curcop);
+    I32 is_something_there = FALSE;
 
     register const I32 max = lastrelem - lastlelem;
     register SV **lelem;
@@ -3975,16 +4126,17 @@ PP(pp_anonlist)
 {
     dVAR; dSP; dMARK; dORIGMARK;
     const I32 items = SP - MARK;
-    SV * const av = sv_2mortal((SV*)av_make(items, MARK+1));
+    SV * const av = (SV *) av_make(items, MARK+1);
     SP = ORIGMARK;		/* av_make() might realloc stack_sp */
-    XPUSHs(av);
+    XPUSHs(sv_2mortal((PL_op->op_flags & OPf_SPECIAL)
+		      ? newRV_noinc(av) : av));
     RETURN;
 }
 
 PP(pp_anonhash)
 {
     dVAR; dSP; dMARK; dORIGMARK;
-    HV* const hv = (HV*)sv_2mortal((SV*)newHV());
+    HV* const hv = newHV();
 
     while (MARK < SP) {
 	SV * const key = *++MARK;
@@ -3996,7 +4148,8 @@ PP(pp_anonhash)
 	(void)hv_store_ent(hv,key,val,0);
     }
     SP = ORIGMARK;
-    XPUSHs((SV*)hv);
+    XPUSHs(sv_2mortal((PL_op->op_flags & OPf_SPECIAL)
+		      ? newRV_noinc((SV*) hv) : (SV*)hv));
     RETURN;
 }
 
@@ -4012,7 +4165,6 @@ PP(pp_splice)
     I32 newlen;
     I32 after;
     I32 diff;
-    SV **tmparyval = NULL;
     const MAGIC * const mg = SvTIED_mg((SV*)ary, PERL_MAGIC_tied);
 
     if (mg) {
@@ -4033,7 +4185,7 @@ PP(pp_splice)
 	if (offset < 0)
 	    offset += AvFILLp(ary) + 1;
 	else
-	    offset -= PL_curcop->cop_arybase;
+	    offset -= CopARYBASE_get(PL_curcop);
 	if (offset < 0)
 	    DIE(aTHX_ PL_no_aelem, i);
 	if (++MARK < SP) {
@@ -4078,6 +4230,7 @@ PP(pp_splice)
     }
 
     if (diff < 0) {				/* shrinking the area */
+	SV **tmparyval = NULL;
 	if (newlen) {
 	    Newx(tmparyval, newlen, SV*);	/* so remember insertion */
 	    Copy(MARK, tmparyval, newlen, SV*);
@@ -4138,15 +4291,14 @@ PP(pp_splice)
 	}
     }
     else {					/* no, expanding (or same) */
+	SV** tmparyval = NULL;
 	if (length) {
 	    Newx(tmparyval, length, SV*);	/* so remember deletion */
 	    Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
 	}
 
 	if (diff > 0) {				/* expanding */
-
 	    /* push up or down? */
-
 	    if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
 		if (offset) {
 		    src = AvARRAY(ary);
@@ -4187,7 +4339,6 @@ PP(pp_splice)
 			dst++;
 		    }
 		}
-		Safefree(tmparyval);
 	    }
 	    MARK += length - 1;
 	}
@@ -4198,10 +4349,10 @@ PP(pp_splice)
 		while (length-- > 0)
 		    SvREFCNT_dec(tmparyval[length]);
 	    }
-	    Safefree(tmparyval);
 	}
 	else
 	    *MARK = &PL_sv_undef;
+	Safefree(tmparyval);
     }
     SP = MARK;
     RETURN;
@@ -4210,7 +4361,7 @@ PP(pp_splice)
 PP(pp_push)
 {
     dVAR; dSP; dMARK; dORIGMARK; dTARGET;
-    register AV *ary = (AV*)*++MARK;
+    register AV * const ary = (AV*)*++MARK;
     const MAGIC * const mg = SvTIED_mg((SV*)ary, PERL_MAGIC_tied);
 
     if (mg) {
@@ -4300,7 +4451,7 @@ PP(pp_reverse)
 	register I32 tmp;
 	dTARGET;
 	STRLEN len;
-	I32 padoff_du;
+	PADOFFSET padoff_du;
 
 	SvUTF8_off(TARG);				/* decontaminate */
 	if (SP - MARK > 1)
@@ -4367,7 +4518,7 @@ PP(pp_split)
     register SV *dstr;
     register const char *m;
     I32 iters = 0;
-    const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (strend - s);
+    const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
     I32 maxiters = slen + 10;
     const char *orig;
     const I32 origlimit = limit;
@@ -4490,7 +4641,7 @@ PP(pp_split)
 	     && (rx->reganch & ROPT_CHECK_ALL)
 	     && !(rx->reganch & ROPT_ANCH)) {
 	const int tail = (rx->reganch & RE_INTUIT_TAIL);
-	SV * const csv = CALLREG_INTUIT_STRING(aTHX_ rx);
+	SV * const csv = CALLREG_INTUIT_STRING(rx);
 
 	len = rx->minlen;
 	if (len == 1 && !(rx->reganch & ROPT_UTF8) && !tail) {
@@ -4540,7 +4691,7 @@ PP(pp_split)
 	{
 	    I32 rex_return;
 	    PUTBACK;
-	    rex_return = CALLREGEXEC(aTHX_ rx, (char*)s, (char*)strend, (char*)orig, 1 ,
+	    rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
 			    sv, NULL, 0);
 	    SPAGAIN;
 	    if (rex_return == 0)