X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/78ed8e3629d58d11345e4367dbe14b9603e8c84b..6987f4434e4dfee71506125954ee1ae41c46f1cb:/pp.c

diff --git a/pp.c b/pp.c
index 01b979e..5b5e163 100644
--- a/pp.c
+++ b/pp.c
@@ -28,6 +28,7 @@
 #include "perl.h"
 #include "keywords.h"
 
+#include "invlist_inline.h"
 #include "reentr.h"
 #include "regcharclass.h"
 
@@ -1267,16 +1268,10 @@ PP(pp_multiply)
             NV nr = SvNVX(svr);
             NV result;
 
-            if (
-#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
-                !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
-                && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
-#else
-                nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
-#endif
-                )
+            if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) {
                 /* nothing was lost by converting to IVs */
                 goto do_iv;
+            }
             SP--;
             result = nl * nr;
 #  if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
@@ -1315,8 +1310,9 @@ PP(pp_multiply)
 		    alow = aiv;
 		    auvok = TRUE; /* effectively it's a UV now */
 		} else {
-                    /* abs, auvok == false records sign */
-		    alow = -(UV)aiv;
+                    /* abs, auvok == false records sign; Using 0- here and
+                     * later to silence bogus warning from MS VC */
+		    alow = (UV) (0 - (UV) aiv);
 		}
 	    }
 	    if (buvok) {
@@ -1328,7 +1324,7 @@ PP(pp_multiply)
 		    buvok = TRUE; /* effectively it's a UV now */
 		} else {
                     /* abs, buvok == false records sign */
-		    blow = -(UV)biv;
+		    blow = (UV) (0 - (UV) biv);
 		}
 	    }
 
@@ -1563,7 +1559,7 @@ PP(pp_modulo)
                     right = biv;
                     right_neg = FALSE; /* effectively it's a UV now */
                 } else {
-                    right = -(UV)biv;
+		    right = (UV) (0 - (UV) biv);
                 }
             }
         }
@@ -1593,7 +1589,7 @@ PP(pp_modulo)
                         left = aiv;
                         left_neg = FALSE; /* effectively it's a UV now */
                     } else {
-                        left = -(UV)aiv;
+                        left = (UV) (0 - (UV) aiv);
                     }
                 }
         }
@@ -1693,7 +1689,8 @@ PP(pp_repeat)
 	    else {
 		dTOPss;
 		ASSUME(MARK + 1 == SP);
-		XPUSHs(sv);
+                MEXTEND(SP, 1);
+                PUSHs(sv);
 		MARK[1] = &PL_sv_undef;
 	    }
 	    SP = MARK + 2;
@@ -1846,16 +1843,10 @@ PP(pp_subtract)
             NV nl = SvNVX(svl);
             NV nr = SvNVX(svr);
 
-            if (
-#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
-                !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
-                && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
-#else
-                nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
-#endif
-                )
+            if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) {
                 /* nothing was lost by converting to IVs */
                 goto do_iv;
+            }
             SP--;
             TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */
             SETs(TARG);
@@ -1891,7 +1882,7 @@ PP(pp_subtract)
 			auv = aiv;
 			auvok = 1;	/* Now acting as a sign flag.  */
 		    } else {
-			auv = -(UV)aiv;
+                        auv = (UV) (0 - (UV) aiv);
 		    }
 		}
 		a_valid = 1;
@@ -1911,7 +1902,7 @@ PP(pp_subtract)
 		    buv = biv;
 		    buvok = 1;
 		} else
-                    buv = -(UV)biv;
+                    buv = (UV) (0 - (UV) biv);
 	    }
 	    /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
 	       else "IV" now, independent of how it came in.
@@ -1988,7 +1979,7 @@ static UV S_uv_shift(UV uv, int shift, bool left)
        shift = -shift;
        left = !left;
    }
-   if (shift >= IV_BITS) {
+   if (UNLIKELY(shift >= IV_BITS)) {
        return 0;
    }
    return left ? uv << shift : uv >> shift;
@@ -1996,14 +1987,34 @@ static UV S_uv_shift(UV uv, int shift, bool left)
 
 static IV S_iv_shift(IV iv, int shift, bool left)
 {
-   if (shift < 0) {
-       shift = -shift;
-       left = !left;
-   }
-   if (shift >= IV_BITS) {
-       return iv < 0 && !left ? -1 : 0;
-   }
-   return left ? iv << shift : iv >> shift;
+    if (shift < 0) {
+        shift = -shift;
+        left = !left;
+    }
+
+    if (UNLIKELY(shift >= IV_BITS)) {
+        return iv < 0 && !left ? -1 : 0;
+    }
+
+    /* For left shifts, perl 5 has chosen to treat the value as unsigned for
+     * the * purposes of shifting, then cast back to signed.  This is very
+     * different from Raku:
+     *
+     * $ raku -e 'say -2 +< 5'
+     * -64
+     *
+     * $ ./perl -le 'print -2 << 5'
+     * 18446744073709551552
+     * */
+    if (left) {
+        if (iv == IV_MIN) { /* Casting this to a UV is undefined behavior */
+            return 0;
+        }
+        return (IV) (((UV) iv) << shift);
+    }
+
+    /* Here is right shift */
+    return iv >> shift;
 }
 
 #define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE)
@@ -2051,14 +2062,20 @@ PP(pp_lt)
 {
     dSP;
     SV *left, *right;
+    U32 flags_and, flags_or;
 
-    tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
+    tryAMAGICbin_MG(lt_amg, AMGf_numeric);
     right = POPs;
     left  = TOPs;
+    flags_and = SvFLAGS(left) & SvFLAGS(right);
+    flags_or  = SvFLAGS(left) | SvFLAGS(right);
+
     SETs(boolSV(
-	(SvIOK_notUV(left) && SvIOK_notUV(right))
-	? (SvIVX(left) < SvIVX(right))
-	: (do_ncmp(left, right) == -1)
+        ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+        ?    (SvIVX(left) < SvIVX(right))
+        : (flags_and & SVf_NOK)
+        ?    (SvNVX(left) < SvNVX(right))
+        : (do_ncmp(left, right) == -1)
     ));
     RETURN;
 }
@@ -2067,14 +2084,20 @@ PP(pp_gt)
 {
     dSP;
     SV *left, *right;
+    U32 flags_and, flags_or;
 
-    tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
+    tryAMAGICbin_MG(gt_amg, AMGf_numeric);
     right = POPs;
     left  = TOPs;
+    flags_and = SvFLAGS(left) & SvFLAGS(right);
+    flags_or  = SvFLAGS(left) | SvFLAGS(right);
+
     SETs(boolSV(
-	(SvIOK_notUV(left) && SvIOK_notUV(right))
-	? (SvIVX(left) > SvIVX(right))
-	: (do_ncmp(left, right) == 1)
+        ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+        ?    (SvIVX(left) > SvIVX(right))
+        : (flags_and & SVf_NOK)
+        ?    (SvNVX(left) > SvNVX(right))
+        : (do_ncmp(left, right) == 1)
     ));
     RETURN;
 }
@@ -2083,14 +2106,20 @@ PP(pp_le)
 {
     dSP;
     SV *left, *right;
+    U32 flags_and, flags_or;
 
-    tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
+    tryAMAGICbin_MG(le_amg, AMGf_numeric);
     right = POPs;
     left  = TOPs;
+    flags_and = SvFLAGS(left) & SvFLAGS(right);
+    flags_or  = SvFLAGS(left) | SvFLAGS(right);
+
     SETs(boolSV(
-	(SvIOK_notUV(left) && SvIOK_notUV(right))
-	? (SvIVX(left) <= SvIVX(right))
-	: (do_ncmp(left, right) <= 0)
+        ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+        ?    (SvIVX(left) <= SvIVX(right))
+        : (flags_and & SVf_NOK)
+        ?    (SvNVX(left) <= SvNVX(right))
+        : (do_ncmp(left, right) <= 0)
     ));
     RETURN;
 }
@@ -2099,14 +2128,20 @@ PP(pp_ge)
 {
     dSP;
     SV *left, *right;
+    U32 flags_and, flags_or;
 
-    tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
+    tryAMAGICbin_MG(ge_amg, AMGf_numeric);
     right = POPs;
     left  = TOPs;
+    flags_and = SvFLAGS(left) & SvFLAGS(right);
+    flags_or  = SvFLAGS(left) | SvFLAGS(right);
+
     SETs(boolSV(
-	(SvIOK_notUV(left) && SvIOK_notUV(right))
-	? (SvIVX(left) >= SvIVX(right))
-	: ( (do_ncmp(left, right) & 2) == 0)
+        ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+        ?    (SvIVX(left) >= SvIVX(right))
+        : (flags_and & SVf_NOK)
+        ?    (SvNVX(left) >= SvNVX(right))
+        : ( (do_ncmp(left, right) & 2) == 0)
     ));
     RETURN;
 }
@@ -2115,14 +2150,20 @@ PP(pp_ne)
 {
     dSP;
     SV *left, *right;
+    U32 flags_and, flags_or;
 
-    tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
+    tryAMAGICbin_MG(ne_amg, AMGf_numeric);
     right = POPs;
     left  = TOPs;
+    flags_and = SvFLAGS(left) & SvFLAGS(right);
+    flags_or  = SvFLAGS(left) | SvFLAGS(right);
+
     SETs(boolSV(
-	(SvIOK_notUV(left) && SvIOK_notUV(right))
-	? (SvIVX(left) != SvIVX(right))
-	: (do_ncmp(left, right) != 0)
+        ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+        ?    (SvIVX(left) != SvIVX(right))
+        : (flags_and & SVf_NOK)
+        ?    (SvNVX(left) != SvNVX(right))
+        : (do_ncmp(left, right) != 0)
     ));
     RETURN;
 }
@@ -2248,7 +2289,7 @@ PP(pp_sle)
 	break;
     }
 
-    tryAMAGICbin_MG(amg_type, AMGf_set);
+    tryAMAGICbin_MG(amg_type, 0);
     {
       dPOPTOPssrl;
       const int cmp =
@@ -2266,7 +2307,7 @@ PP(pp_sle)
 PP(pp_seq)
 {
     dSP;
-    tryAMAGICbin_MG(seq_amg, AMGf_set);
+    tryAMAGICbin_MG(seq_amg, 0);
     {
       dPOPTOPssrl;
       SETs(boolSV(sv_eq_flags(left, right, 0)));
@@ -2277,7 +2318,7 @@ PP(pp_seq)
 PP(pp_sne)
 {
     dSP;
-    tryAMAGICbin_MG(sne_amg, AMGf_set);
+    tryAMAGICbin_MG(sne_amg, 0);
     {
       dPOPTOPssrl;
       SETs(boolSV(!sv_eq_flags(left, right, 0)));
@@ -2512,7 +2553,7 @@ PP(pp_not)
     dSP;
     SV *sv;
 
-    tryAMAGICun_MG(not_amg, AMGf_set);
+    tryAMAGICun_MG(not_amg, 0);
     sv = *PL_stack_sp;
     *PL_stack_sp = boolSV(!SvTRUE_nomg_NN(sv));
     return NORMAL;
@@ -2532,23 +2573,22 @@ S_scomplement(pTHX_ SV *targ, SV *sv)
             if (len && ! utf8_to_bytes(tmps, &len)) {
                 Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[PL_op->op_type]);
             }
-            SvCUR(TARG) = len;
+            SvCUR_set(TARG, len);
             SvUTF8_off(TARG);
         }
 
 	anum = len;
 
-#ifdef LIBERAL
 	{
 	    long *tmpl;
-	    for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
+	    for ( ; anum && PTR2nat(tmps) % sizeof(long); anum--, tmps++)
 		*tmps = ~*tmps;
 	    tmpl = (long*)tmps;
 	    for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
 		*tmpl = ~*tmpl;
 	    tmps = (U8*)tmpl;
 	}
-#endif
+
 	for ( ; anum > 0; anum--, tmps++)
 	    *tmps = ~*tmps;
 }
@@ -2644,7 +2684,6 @@ PP(pp_i_divide)
 
 PP(pp_i_modulo)
 {
-     /* This is the vanilla old i_modulo. */
      dSP; dATARGET;
      tryAMAGICbin_MG(modulo_amg, AMGf_assign);
      {
@@ -2660,30 +2699,6 @@ PP(pp_i_modulo)
      }
 }
 
-#if defined(__GLIBC__) && IVSIZE == 8 \
-    && ( __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8))
-
-PP(pp_i_modulo_glibc_bugfix)
-{
-     /* This is the i_modulo with the workaround for the _moddi3 bug
-      * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
-      * See below for pp_i_modulo. */
-     dSP; dATARGET;
-     tryAMAGICbin_MG(modulo_amg, AMGf_assign);
-     {
-	  dPOPTOPiirl_nomg;
-	  if (!right)
-	       DIE(aTHX_ "Illegal modulus zero");
-	  /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
-	  if (right == -1)
-	      SETi( 0 );
-	  else
-	      SETi( left % PERL_ABS(right) );
-	  RETURN;
-     }
-}
-#endif
-
 PP(pp_i_add)
 {
     dSP; dATARGET;
@@ -2709,7 +2724,7 @@ PP(pp_i_subtract)
 PP(pp_i_lt)
 {
     dSP;
-    tryAMAGICbin_MG(lt_amg, AMGf_set);
+    tryAMAGICbin_MG(lt_amg, 0);
     {
       dPOPTOPiirl_nomg;
       SETs(boolSV(left < right));
@@ -2720,7 +2735,7 @@ PP(pp_i_lt)
 PP(pp_i_gt)
 {
     dSP;
-    tryAMAGICbin_MG(gt_amg, AMGf_set);
+    tryAMAGICbin_MG(gt_amg, 0);
     {
       dPOPTOPiirl_nomg;
       SETs(boolSV(left > right));
@@ -2731,7 +2746,7 @@ PP(pp_i_gt)
 PP(pp_i_le)
 {
     dSP;
-    tryAMAGICbin_MG(le_amg, AMGf_set);
+    tryAMAGICbin_MG(le_amg, 0);
     {
       dPOPTOPiirl_nomg;
       SETs(boolSV(left <= right));
@@ -2742,7 +2757,7 @@ PP(pp_i_le)
 PP(pp_i_ge)
 {
     dSP;
-    tryAMAGICbin_MG(ge_amg, AMGf_set);
+    tryAMAGICbin_MG(ge_amg, 0);
     {
       dPOPTOPiirl_nomg;
       SETs(boolSV(left >= right));
@@ -2753,7 +2768,7 @@ PP(pp_i_ge)
 PP(pp_i_eq)
 {
     dSP;
-    tryAMAGICbin_MG(eq_amg, AMGf_set);
+    tryAMAGICbin_MG(eq_amg, 0);
     {
       dPOPTOPiirl_nomg;
       SETs(boolSV(left == right));
@@ -2764,7 +2779,7 @@ PP(pp_i_eq)
 PP(pp_i_ne)
 {
     dSP;
-    tryAMAGICbin_MG(ne_amg, AMGf_set);
+    tryAMAGICbin_MG(ne_amg, 0);
     {
       dPOPTOPiirl_nomg;
       SETs(boolSV(left != right));
@@ -3074,11 +3089,16 @@ PP(pp_oct)
     if (*tmps == '0')
         tmps++, len--;
     if (isALPHA_FOLD_EQ(*tmps, 'x')) {
+        tmps++, len--;
+        flags |= PERL_SCAN_DISALLOW_PREFIX;
     hex:
         result_uv = grok_hex (tmps, &len, &flags, &result_nv);
     }
-    else if (isALPHA_FOLD_EQ(*tmps, 'b'))
+    else if (isALPHA_FOLD_EQ(*tmps, 'b')) {
+        tmps++, len--;
+        flags |= PERL_SCAN_DISALLOW_PREFIX;
         result_uv = grok_bin (tmps, &len, &flags, &result_nv);
+    }
     else
         result_uv = grok_oct (tmps, &len, &flags, &result_nv);
 
@@ -3530,11 +3550,16 @@ PP(pp_index)
   push_result:
     /* OPpTRUEBOOL indicates an '== -1' has been optimised away */
     if (PL_op->op_private & OPpTRUEBOOL) {
-        PUSHs( ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG))
-                    ? &PL_sv_yes : &PL_sv_no);
-        if (PL_op->op_private & OPpTARGET_MY)
+        SV *result = ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG))
+            ? &PL_sv_yes : &PL_sv_no;
+        if (PL_op->op_private & OPpTARGET_MY) {
             /* $lex = (index() == -1) */
-            sv_setsv(TARG, TOPs);
+            sv_setsv_mg(TARG, result);
+            PUSHs(TARG);
+        }
+        else {
+            PUSHs(result);
+        }
     }
     else
         PUSHi(retval);
@@ -3642,33 +3667,25 @@ PP(pp_crypt)
 	 sv_utf8_downgrade(tsv, FALSE);
 	 tmps = SvPV_const(tsv, len);
     }
-#   ifdef USE_ITHREADS
-#     ifdef HAS_CRYPT_R
+#  ifdef USE_ITHREADS
+#    ifdef HAS_CRYPT_R
     if (!PL_reentrant_buffer->_crypt_struct_buffer) {
       /* This should be threadsafe because in ithreads there is only
        * one thread per interpreter.  If this would not be true,
        * we would need a mutex to protect this malloc. */
         PL_reentrant_buffer->_crypt_struct_buffer =
 	  (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
-#if defined(__GLIBC__) || defined(__EMX__)
+#      if defined(__GLIBC__) || defined(__EMX__)
 	if (PL_reentrant_buffer->_crypt_struct_buffer) {
 	    PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
-#if (defined(__GLIBC__) && __GLIBC__ == 2) && \
-    (defined(__GLIBC_MINOR__) && __GLIBC_MINOR__ >= 2 && __GLIBC_MINOR__ < 4)
-	    /* work around glibc-2.2.5 bug, has been fixed at some
-	     * time in glibc-2.3.X */
-	    PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
-#endif
 	}
-#endif
+#      endif
     }
-#     endif /* HAS_CRYPT_R */
-#   endif /* USE_ITHREADS */
-#   ifdef FCRYPT
-    sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
-#   else
+#    endif /* HAS_CRYPT_R */
+#  endif /* USE_ITHREADS */
+
     sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
-#   endif
+
     SvUTF8_off(TARG);
     SETTARG;
     RETURN;
@@ -3708,6 +3725,7 @@ PP(pp_ucfirst)
     STRLEN tculen;  /* tculen is the byte length of the freshly titlecased (or
 		     * lowercased) character stored in tmpbuf.  May be either
 		     * UTF-8 or not, but in either case is the number of bytes */
+    bool remove_dot_above = FALSE;
 
     s = (const U8*)SvPV_const(source, slen);
 
@@ -3736,6 +3754,7 @@ PP(pp_ucfirst)
     else if (DO_UTF8(source)) {	/* Is the source utf8? */
 	doing_utf8 = TRUE;
         ulen = UTF8SKIP(s);
+
         if (op_type == OP_UCFIRST) {
 #ifdef USE_LOCALE_CTYPE
 	    _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
@@ -3748,7 +3767,37 @@ PP(pp_ucfirst)
 #ifdef USE_LOCALE_CTYPE
 
 	    _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
+
+            /* In turkic locales, lower casing an 'I' normally yields U+0131,
+             * LATIN SMALL LETTER DOTLESS I, but not if the grapheme also
+             * contains a COMBINING DOT ABOVE.  Instead it is treated like
+             * LATIN CAPITAL LETTER I WITH DOT ABOVE lowercased to 'i'.  The
+             * call to lowercase above has handled this.  But SpecialCasing.txt
+             * says we are supposed to remove the COMBINING DOT ABOVE.  We can
+             * tell if we have this situation if I ==> i in a turkic locale. */
+            if (   UNLIKELY(PL_in_utf8_turkic_locale)
+                && IN_LC_RUNTIME(LC_CTYPE)
+                && (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')))
+            {
+                /* Here, we know there was a COMBINING DOT ABOVE.  We won't be
+                 * able to handle this in-place. */
+                inplace = FALSE;
+
+                /* It seems likely that the DOT will immediately follow the
+                 * 'I'.  If so, we can remove it simply by indicating to the
+                 * code below to start copying the source just beyond the DOT.
+                 * We know its length is 2 */
+                if (LIKELY(memBEGINs(s + 1, s + slen, COMBINING_DOT_ABOVE_UTF8))) {
+                    ulen += 2;
+                }
+                else {  /* But if it doesn't follow immediately, set a flag for
+                           the code below */
+                    remove_dot_above = TRUE;
+                }
+            }
 #else
+            PERL_UNUSED_VAR(remove_dot_above);
+
 	    _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0);
 #endif
 
@@ -3766,41 +3815,63 @@ PP(pp_ucfirst)
 			 * need to be overridden for the tricky ones */
 	need = slen + 1;
 
-	if (op_type == OP_LCFIRST) {
 
-	    /* lower case the first letter: no trickiness for any character */
 #ifdef USE_LOCALE_CTYPE
-            if (IN_LC_RUNTIME(LC_CTYPE)) {
-                *tmpbuf = toLOWER_LC(*s);
-            }
-            else
-#endif
+
+        if (IN_LC_RUNTIME(LC_CTYPE)) {
+            if (    UNLIKELY(PL_in_utf8_turkic_locale)
+                && (   (op_type == OP_LCFIRST && UNLIKELY(*s == 'I'))
+                    || (op_type == OP_UCFIRST && UNLIKELY(*s == 'i'))))
             {
-                *tmpbuf = (IN_UNI_8_BIT)
-                          ? toLOWER_LATIN1(*s)
-                          : toLOWER(*s);
+                if (*s == 'I') { /* lcfirst('I') */
+                    tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
+                    tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
+                }
+                else {  /* ucfirst('i') */
+                    tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                    tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                }
+                tculen = 2;
+                inplace = FALSE;
+                doing_utf8 = TRUE;
+                convert_source_to_utf8 = TRUE;
+                need += variant_under_utf8_count(s, s + slen);
             }
-	}
-#ifdef USE_LOCALE_CTYPE
-	/* is ucfirst() */
-	else if (IN_LC_RUNTIME(LC_CTYPE)) {
-            if (IN_UTF8_CTYPE_LOCALE) {
-                goto do_uni_rules;
+            else if (op_type == OP_LCFIRST) {
+
+                /* For lc, there are no gotchas for UTF-8 locales (other than
+                 * the turkish ones already handled above) */
+                *tmpbuf = toLOWER_LC(*s);
             }
+            else { /* ucfirst */
 
-            *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
-                                              locales have upper and title case
-                                              different */
-	}
+                /* But for uc, some characters require special handling */
+                if (IN_UTF8_CTYPE_LOCALE) {
+                    goto do_uni_rules;
+                }
+
+                /* This would be a bug if any locales have upper and title case
+                 * different */
+                *tmpbuf = (U8) toUPPER_LC(*s);
+            }
+        }
+        else
 #endif
-	else if (! IN_UNI_8_BIT) {
-	    *tmpbuf = toUPPER(*s);	/* Returns caseless for non-ascii, or
-					 * on EBCDIC machines whatever the
-					 * native function does */
-	}
+        /* Here, not in locale.  If not using Unicode rules, is a simple
+         * lower/upper, depending */
+        if (! IN_UNI_8_BIT) {
+            *tmpbuf = (op_type == OP_LCFIRST)
+                      ? toLOWER(*s)
+                      : toUPPER(*s);
+        }
+        else if (op_type == OP_LCFIRST) {
+            /* lower case the first letter: no trickiness for any character */
+            *tmpbuf = toLOWER_LATIN1(*s);
+        }
         else {
             /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
-             * UTF-8, which we treat as not in locale), and cased latin1 */
+             * non-turkic UTF-8, which we treat as not in locale), and cased
+             * latin1 */
 	    UV title_ord;
 #ifdef USE_LOCALE_CTYPE
       do_uni_rules:
@@ -3824,16 +3895,19 @@ PP(pp_ucfirst)
 		    inplace = FALSE;
 
                     /* If the result won't fit in a byte, the entire result
-                     * will have to be in UTF-8.  Assume worst case sizing in
-                     * conversion. (all latin1 characters occupy at most two
-                     * bytes in utf8) */
+                     * will have to be in UTF-8.  Allocate enough space for the
+                     * expanded first byte, and if UTF-8, the rest of the input
+                     * string, some or all of which may also expand to two
+                     * bytes, plus the terminating NUL. */
 		    if (title_ord > 255) {
 			doing_utf8 = TRUE;
 			convert_source_to_utf8 = TRUE;
-			need = slen * 2 + 1;
+			need = slen
+                            + variant_under_utf8_count(s, s + slen)
+                            + 1;
 
                         /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
-                         * (both) characters whose title case is above 255 is
+                         * characters whose title case is above 255 is
                          * 2. */
 			ulen = 2;
 		    }
@@ -3877,6 +3951,29 @@ PP(pp_ucfirst)
 		 * of the string. */
 		sv_setpvn(dest, (char*)tmpbuf, tculen);
 		if (slen > ulen) {
+
+                    /* But this boolean being set means we are in a turkic
+                     * locale, and there is a DOT character that needs to be
+                     * removed, and it isn't immediately after the current
+                     * character.  Keep concatenating characters to the output
+                     * one at a time, until we find the DOT, which we simply
+                     * skip */
+                    if (UNLIKELY(remove_dot_above)) {
+                        do {
+                            Size_t this_len = UTF8SKIP(s + ulen);
+
+                            sv_catpvn(dest, (char*)(s + ulen), this_len);
+
+                            ulen += this_len;
+                            if (memBEGINs(s + ulen, s + slen, COMBINING_DOT_ABOVE_UTF8)) {
+                                ulen += 2;
+                                break;
+                            }
+                        } while (s + ulen < s + slen);
+                    }
+
+                    /* The rest of the string can be concatenated unchanged,
+                     * all at once */
 		    sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
 		}
 	    }
@@ -3888,7 +3985,7 @@ PP(pp_ucfirst)
 		 * into tmpbuf.  First put that into dest, and then append the
 		 * rest of the source, converting it to UTF-8 as we go. */
 
-		/* Assert tculen is 2 here because the only two characters that
+		/* Assert tculen is 2 here because the only characters that
 		 * get to this part of the code have 2-byte UTF-8 equivalents */
                 assert(tculen == 2);
 		*d++ = *tmpbuf;
@@ -3931,7 +4028,7 @@ PP(pp_ucfirst)
 
 	/* In a "use bytes" we don't treat the source as UTF-8, but, still want
 	 * the destination to retain that flag */
-	if (SvUTF8(source) && ! IN_BYTES)
+	if (DO_UTF8(source))
 	    SvUTF8_on(dest);
 
 	if (!inplace) {	/* Finish the rest of the string, unchanged */
@@ -4036,12 +4133,16 @@ PP(pp_uc)
 	    STRLEN u;
 	    STRLEN ulen;
 	    UV uv;
-	    if (in_iota_subscript && ! _is_utf8_mark(s)) {
+	    if (UNLIKELY(in_iota_subscript)) {
+                UV cp = utf8_to_uvchr_buf(s, send, NULL);
 
-		/* A non-mark.  Time to output the iota subscript */
-		*d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA);
-		*d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA);
-		in_iota_subscript = FALSE;
+                if (! _invlist_contains_cp(PL_utf8_mark, cp)) {
+
+                    /* A non-mark.  Time to output the iota subscript */
+                    *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA);
+                    *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA);
+                    in_iota_subscript = FALSE;
+                }
             }
 
             /* Then handle the current character.  Get the changed case value
@@ -4113,16 +4214,27 @@ PP(pp_uc)
           do_uni_rules:
 #endif
 		for (; s < send; d++, s++) {
+                    Size_t extra;
+
 		    *d = toUPPER_LATIN1_MOD(*s);
-		    if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
+		    if (   LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)
+
+#ifdef USE_LOCALE_CTYPE
+
+                        && (LIKELY(   ! PL_in_utf8_turkic_locale
+                                   || ! IN_LC_RUNTIME(LC_CTYPE))
+                                   || *s != 'i')
+#endif
+
+                    ) {
                         continue;
                     }
 
 		    /* The mainstream case is the tight loop above.  To avoid
-		     * extra tests in that, all three characters that require
-		     * special handling are mapped by the MOD to the one tested
-		     * just above.  
-		     * Use the source to distinguish between the three cases */
+                     * extra tests in that, all three characters that always
+                     * require special handling are mapped by the MOD to the
+                     * one tested just above.  Use the source to distinguish
+                     * between those cases */
 
 #if    UNICODE_MAJOR_VERSION > 2                                        \
    || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1		\
@@ -4140,7 +4252,7 @@ PP(pp_uc)
 		    }
 #endif
 
-		    /* The other two special handling characters have their
+		    /* The other special handling characters have their
 		     * upper cases outside the latin1 range, hence need to be
 		     * in UTF-8, so the whole result needs to be in UTF-8.
                      *
@@ -4163,35 +4275,82 @@ PP(pp_uc)
                      * come separately to UTF-8, then jump into the loop that
                      * handles UTF-8.  But the most efficient time-wise of the
                      * ones I could think of is what follows, and turned out to
-                     * not require much extra code. */
+                     * not require much extra code.
+                     *
+                     * First, calculate the extra space needed for the
+                     * remainder of the source needing to be in UTF-8.  Except
+                     * for the 'i' in Turkic locales, in UTF-8 strings, the
+                     * uppercase of a character below 256 occupies the same
+                     * number of bytes as the original.  Therefore, the space
+                     * needed is the that number plus the number of characters
+                     * that become two bytes when converted to UTF-8, plus, in
+                     * turkish locales, the number of 'i's. */
+
+                    extra = send - s + variant_under_utf8_count(s, send);
+
+#ifdef USE_LOCALE_CTYPE
+
+                    if (UNLIKELY(*s == 'i')) {  /* We wouldn't get an 'i' here
+                                                   unless are in a Turkic
+                                                   locale */
+                        const U8 * s_peek = s;
+
+                        do {
+                            extra++;
+
+                            s_peek = (U8 *) memchr(s_peek + 1, 'i',
+                                                   send - (s_peek + 1));
+                        } while (s_peek != NULL);
+                    }
+#endif
 
                     /* Convert what we have so far into UTF-8, telling the
 		     * function that we know it should be converted, and to
 		     * allow extra space for what we haven't processed yet.
-		     * Assume the worst case space requirements for converting
-		     * what we haven't processed so far: that it will require
-		     * two bytes for each remaining source character, plus the
-		     * NUL at the end.  This may cause the string pointer to
-		     * move, so re-find it. */
+                     *
+                     * This may cause the string pointer to move, so need to
+                     * save and re-find it. */
 
 		    len = d - (U8*)SvPVX_const(dest);
 		    SvCUR_set(dest, len);
 		    len = sv_utf8_upgrade_flags_grow(dest,
 						SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-						(send -s) * 2 + 1);
+                                                extra
+                                              + 1 /* trailing NUL */ );
 		    d = (U8*)SvPVX(dest) + len;
 
                     /* Now process the remainder of the source, simultaneously
-                     * converting to upper and UTF-8. */
-		    for (; s < send; s++) {
-			(void) _to_upper_title_latin1(*s, d, &len, 'S');
-			d += len;
-		    }
-
-		    /* Here have processed the whole source; no need to continue
-		     * with the outer loop.  Each character has been converted
-		     * to upper case and converted to UTF-8 */
+                     * converting to upper and UTF-8.
+                     *
+                     * To avoid extra tests in the loop body, and since the
+                     * loop is so simple, split out the rare Turkic case into
+                     * its own loop */
 
+#ifdef USE_LOCALE_CTYPE
+                    if (   UNLIKELY(PL_in_utf8_turkic_locale)
+                        && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE)))
+                    {
+                        for (; s < send; s++) {
+                            if (*s == 'i') {
+                                *d++ = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                                *d++ = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                            }
+                            else {
+                                (void) _to_upper_title_latin1(*s, d, &len, 'S');
+                                d += len;
+                            }
+                        }
+                    }
+                    else
+#endif
+                        for (; s < send; s++) {
+                            (void) _to_upper_title_latin1(*s, d, &len, 'S');
+                            d += len;
+                        }
+
+                    /* Here have processed the whole source; no need to
+                     * continue with the outer loop.  Each character has been
+                     * converted to upper case and converted to UTF-8. */
 		    break;
 		} /* End of processing all latin1-style chars */
 	    } /* End of processing all chars */
@@ -4223,15 +4382,26 @@ PP(pp_lc)
     SV *dest;
     const U8 *s;
     U8 *d;
+    bool has_turkic_I = FALSE;
 
     SvGETMAGIC(source);
 
     if (   SvPADTMP(source)
 	&& !SvREADONLY(source) && SvPOK(source)
-	&& !DO_UTF8(source)) {
+	&& !DO_UTF8(source)
+
+#ifdef USE_LOCALE_CTYPE
+
+        && (   LIKELY(! IN_LC_RUNTIME(LC_CTYPE))
+            || LIKELY(! PL_in_utf8_turkic_locale))
 
-	/* We can convert in place, as lowercasing anything in the latin1 range
-	 * (or else DO_UTF8 would have been on) doesn't lengthen it */
+#endif
+
+    ) {
+
+        /* We can convert in place, as, outside of Turkic UTF-8 locales,
+         * lowercasing anything in the latin1 range (or else DO_UTF8 would have
+         * been on) doesn't lengthen it. */
 	dest = source;
 	s = d = (U8*)SvPV_force_nomg(source, len);
 	min = len + 1;
@@ -4253,7 +4423,38 @@ PP(pp_lc)
 #ifdef USE_LOCALE_CTYPE
 
     if (IN_LC_RUNTIME(LC_CTYPE)) {
+        const U8 * next_I;
+
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+
+        /* Lowercasing in a Turkic locale can cause non-UTF-8 to need to become
+         * UTF-8 for the single case of the character 'I' */
+        if (     UNLIKELY(PL_in_utf8_turkic_locale)
+            && ! DO_UTF8(source)
+            &&   (next_I = (U8 *) memchr(s, 'I', len)))
+        {
+            Size_t I_count = 0;
+            const U8 *const send = s + len;
+
+            do {
+                I_count++;
+
+                next_I = (U8 *) memchr(next_I + 1, 'I',
+                                        send - (next_I + 1));
+            } while (next_I != NULL);
+
+            /* Except for the 'I', in UTF-8 strings, the lower case of a
+             * character below 256 occupies the same number of bytes as the
+             * original.  Therefore, the space needed is the original length
+             * plus I_count plus the number of characters that become two bytes
+             * when converted to UTF-8 */
+            sv_utf8_upgrade_flags_grow(dest, 0, len
+                                              + I_count
+                                              + variant_under_utf8_count(s, send)
+                                              + 1 /* Trailing NUL */ );
+            d = (U8*)SvPVX(dest);
+            has_turkic_I = TRUE;
+        }
     }
 
 #endif
@@ -4264,6 +4465,7 @@ PP(pp_lc)
     if (DO_UTF8(source)) {
 	const U8 *const send = s + len;
 	U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
+        bool remove_dot_above = FALSE;
 
 	while (s < send) {
 	    const STRLEN u = UTF8SKIP(s);
@@ -4272,7 +4474,33 @@ PP(pp_lc)
 #ifdef USE_LOCALE_CTYPE
 
 	    _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
+
+            /* If we are in a Turkic locale, we have to do more work.  As noted
+             * in the comments for lcfirst, there is a special case if a 'I'
+             * is in a grapheme with COMBINING DOT ABOVE UTF8.  It turns into a
+             * 'i', and the DOT must be removed.  We check for that situation,
+             * and set a flag if the DOT is there.  Then each time through the
+             * loop, we have to see if we need to remove the next DOT above,
+             * and if so, do it.  We know that there is a DOT because
+             * _toLOWER_utf8_flags() wouldn't have returned 'i' unless there
+             * was one in a proper position. */
+            if (   UNLIKELY(PL_in_utf8_turkic_locale)
+                && IN_LC_RUNTIME(LC_CTYPE))
+            {
+                if (   UNLIKELY(remove_dot_above)
+                    && memBEGINs(tmpbuf, sizeof(tmpbuf), COMBINING_DOT_ABOVE_UTF8))
+                {
+                    s += u;
+                    remove_dot_above = FALSE;
+                    continue;
+                }
+                else if (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')) {
+                    remove_dot_above = TRUE;
+                }
+            }
 #else
+            PERL_UNUSED_VAR(remove_dot_above);
+
 	    _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0);
 #endif
 
@@ -4304,7 +4532,7 @@ PP(pp_lc)
 	SvUTF8_on(dest);
 	*d = '\0';
 	SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
-    } else {	/* Not utf8 */
+    } else {	/* 'source' not utf8 */
 	if (len) {
 	    const U8 *const send = s + len;
 
@@ -4313,8 +4541,22 @@ PP(pp_lc)
 	     * whole thing in a tight loop, for speed, */
 #ifdef USE_LOCALE_CTYPE
             if (IN_LC_RUNTIME(LC_CTYPE)) {
-		for (; s < send; d++, s++)
-		    *d = toLOWER_LC(*s);
+                if (LIKELY( ! has_turkic_I)) {
+                    for (; s < send; d++, s++)
+                        *d = toLOWER_LC(*s);
+                }
+                else {  /* This is the only case where lc() converts 'dest'
+                           into UTF-8 from a non-UTF-8 'source' */
+                    for (; s < send; s++) {
+                        if (*s == 'I') {
+                            *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
+                            *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
+                        }
+                        else {
+                            append_utf8_from_native_byte(toLOWER_LATIN1(*s), &d);
+                        }
+                    }
+                }
             }
 	    else
 #endif
@@ -4522,28 +4764,64 @@ PP(pp_fc)
 #ifdef USE_LOCALE_CTYPE
       do_uni_folding:
 #endif
-            /* For ASCII and the Latin-1 range, there's two
+            /* For ASCII and the Latin-1 range, there's potentially three
              * troublesome folds:
              *      \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
              *             casefolding becomes 'ss';
              *      \x{B5} (\N{MICRO SIGN}), which under any fold becomes
              *             \x{3BC} (\N{GREEK SMALL LETTER MU})
+             *      I      only in Turkic locales, this folds to \x{131}
+             *             \N{LATIN SMALL LETTER DOTLESS I}
              * For the rest, the casefold is their lowercase.  */
             for (; s < send; d++, s++) {
-                if (*s == MICRO_SIGN) {
+                if (    UNLIKELY(*s == MICRO_SIGN)
+#ifdef USE_LOCALE_CTYPE
+                    || (   UNLIKELY(PL_in_utf8_turkic_locale)
+                        && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE))
+                        && UNLIKELY(*s == 'I'))
+#endif
+                ) {
+                    Size_t extra = send - s
+                                 + variant_under_utf8_count(s, send);
+
                     /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
-                     * which is outside of the latin-1 range. There's a couple
-                     * of ways to deal with this -- khw discusses them in
-                     * pp_lc/uc, so go there :) What we do here is upgrade what
-                     * we had already casefolded, then enter an inner loop that
-                     * appends the rest of the characters as UTF-8. */
+                     * and 'I' in Turkic locales is \N{LATIN SMALL LETTER
+                     * DOTLESS I} both of which are outside of the latin-1
+                     * range. There's a couple of ways to deal with this -- khw
+                     * discusses them in pp_lc/uc, so go there :) What we do
+                     * here is upgrade what we had already casefolded, then
+                     * enter an inner loop that appends the rest of the
+                     * characters as UTF-8.
+                     *
+                     * First we calculate the needed size of the upgraded dest
+                     * beyond what's been processed already (the upgrade
+                     * function figures that out).  Except for the 'I' in
+                     * Turkic locales, in UTF-8 strings, the fold case of a
+                     * character below 256 occupies the same number of bytes as
+                     * the original (even the Sharp S).  Therefore, the space
+                     * needed is the number of bytes remaining plus the number
+                     * of characters that become two bytes when converted to
+                     * UTF-8 plus, in turkish locales, the number of 'I's */
+
+                    if (UNLIKELY(*s == 'I')) {
+                        const U8 * s_peek = s;
+
+                        do {
+                            extra++;
+
+                            s_peek = (U8 *) memchr(s_peek + 1, 'i',
+                                                   send - (s_peek + 1));
+                        } while (s_peek != NULL);
+                    }
+
+                    /* Growing may move things, so have to save and recalculate
+                     * 'd' */
                     len = d - (U8*)SvPVX_const(dest);
                     SvCUR_set(dest, len);
                     len = sv_utf8_upgrade_flags_grow(dest,
                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-						/* The max expansion for latin1
-						 * chars is 1 byte becomes 2 */
-                                                (send -s) * 2 + 1);
+                                                extra
+                                              + 1 /* Trailing NUL */ );
                     d = (U8*)SvPVX(dest) + len;
 
                     *d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU);
@@ -4710,7 +4988,7 @@ PP(pp_aeach)
     IV *iterp = Perl_av_iter_p(aTHX_ array);
     const IV current = (*iterp)++;
 
-    if (current > av_tindex(array)) {
+    if (current > av_top_index(array)) {
 	*iterp = 0;
 	if (gimme == G_SCALAR)
 	    RETPUSHUNDEF;
@@ -4738,7 +5016,7 @@ PP(pp_akeys)
 
     if (gimme == G_SCALAR) {
 	dTARGET;
-	PUSHi(av_tindex(array) + 1);
+	PUSHi(av_count(array));
     }
     else if (gimme == G_ARRAY) {
       if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) {
@@ -4749,7 +5027,7 @@ PP(pp_akeys)
                       "Can't modify keys on array in list assignment");
       }
       {
-        IV n = Perl_av_len(aTHX_ array);
+        IV n = av_top_index(array);
         IV i;
 
         EXTEND(SP, n + 1);
@@ -5581,7 +5859,7 @@ PP(pp_reverse)
 		const MAGIC *mg;
 		bool can_preserve = SvCANEXISTDELETE(av);
 
-		for (i = 0, j = av_tindex(av); i < j; ++i, --j) {
+		for (i = 0, j = av_top_index(av); i < j; ++i, --j) {
 		    SV *begin, *end;
 
 		    if (can_preserve) {
@@ -5651,6 +5929,7 @@ PP(pp_reverse)
 	    sv_setsv(TARG, DEFSV);
 	    XPUSHs(TARG);
 	}
+        SvSETMAGIC(TARG); /* remove any utf8 length magic */
 
 	up = SvPV_force(TARG, len);
 	if (len > 1) {
@@ -5732,6 +6011,7 @@ PP(pp_split)
 
     /* handle @ary = split(...) optimisation */
     if (PL_op->op_private & OPpSPLIT_ASSIGN) {
+	realarray = 1;
         if (!(PL_op->op_flags & OPf_STACKED)) {
             if (PL_op->op_private & OPpSPLIT_LEX) {
                 if (PL_op->op_private & OPpLVAL_INTRO)
@@ -5754,26 +6034,13 @@ PP(pp_split)
             oldsave = PL_savestack_ix;
         }
 
-	realarray = 1;
-	PUTBACK;
-	av_extend(ary,0);
-	(void)sv_2mortal(SvREFCNT_inc_simple_NN(sv));
-	av_clear(ary);
-	SPAGAIN;
+	/* Some defence against stack-not-refcounted bugs */
+	(void)sv_2mortal(SvREFCNT_inc_simple_NN(ary));
+
 	if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
 	    PUSHMARK(SP);
 	    XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
-	}
-	else {
-	    if (!AvREAL(ary)) {
-		I32 i;
-		AvREAL_on(ary);
-		AvREIFY_off(ary);
-		for (i = AvFILLp(ary); i >= 0; i--)
-		    AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
-	    }
-	    /* temporarily switch stacks */
-	    SAVESWITCHSTACK(PL_curstack, ary);
+	} else {
 	    make_mortal = 0;
 	}
     }
@@ -5895,62 +6162,52 @@ PP(pp_split)
 	}
     }
     else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
-        /*
-          Pre-extend the stack, either the number of bytes or
-          characters in the string or a limited amount, triggered by:
-
-          my ($x, $y) = split //, $str;
-            or
-          split //, $str, $i;
-        */
-	if (!gimme_scalar) {
-	    const IV items = limit - 1;
-            /* setting it to -1 will trigger a panic in EXTEND() */
-            const SSize_t sslen = slen > SSize_t_MAX ?  -1 : (SSize_t)slen;
-	    if (items >=0 && items < sslen)
-		EXTEND(SP, items);
-	    else
-		EXTEND(SP, sslen);
-	}
-
-        if (do_utf8) {
-            while (--limit) {
-                /* keep track of how many bytes we skip over */
-                m = s;
-                s += UTF8SKIP(s);
-		if (gimme_scalar) {
-		    iters++;
-		    if (s-m == 0)
-			trailing_empty++;
-		    else
-			trailing_empty = 0;
-		} else {
-		    dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
-
-		    PUSHs(dstr);
-		}
-
-                if (s >= strend)
-                    break;
+        /* This case boils down to deciding which is the smaller of:
+         * limit - effectively a number of characters
+         * slen - which already contains the number of characters in s
+         *
+         * The resulting number is the number of iters (for gimme_scalar)
+         * or the number of SVs to create (!gimme_scalar). */
+
+        /* setting it to -1 will trigger a panic in EXTEND() */
+        const SSize_t sslen = slen > SSize_t_MAX ?  -1 : (SSize_t)slen;
+        const IV items = limit - 1;
+        if (sslen < items || items < 0) {
+            iters = slen -1;
+            limit = slen + 1;
+            /* Note: The same result is returned if the following block
+             * is removed, because of the "keep field after final delim?"
+             * adjustment, but having the following makes the "correct"
+             * behaviour more apparent. */
+            if (gimme_scalar) {
+                s = strend;
+                iters++;
             }
         } else {
-            while (--limit) {
-	        if (gimme_scalar) {
-		    iters++;
-		} else {
-		    dstr = newSVpvn(s, 1);
-
-
-		    if (make_mortal)
-			sv_2mortal(dstr);
-
-		    PUSHs(dstr);
-		}
-
-                s++;
-
-                if (s >= strend)
-                    break;
+            iters = items;
+        }
+        if (!gimme_scalar) {
+            /*
+              Pre-extend the stack, either the number of bytes or
+              characters in the string or a limited amount, triggered by:
+              my ($x, $y) = split //, $str;
+                or
+              split //, $str, $i;
+            */
+            EXTEND(SP, limit);
+            if (do_utf8) {
+                while (--limit) {
+                    m = s;
+                    s += UTF8SKIP(s);
+                    dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
+                    PUSHs(dstr);
+                }
+            } else {
+                while (--limit) {
+                    dstr = newSVpvn_flags(s, 1, make_mortal);
+                    PUSHs(dstr);
+                    s++;
+                }
             }
         }
     }
@@ -5983,7 +6240,7 @@ PP(pp_split)
 		/* The rx->minlen is in characters but we want to step
 		 * s ahead by bytes. */
  		if (do_utf8)
-		    s = (char*)utf8_hop((U8*)m, len);
+		    s = (char*)utf8_hop_forward((U8*) m, len, (U8*) strend);
  		else
 		    s = m + len; /* Fake \n at the end */
 	    }
@@ -6007,7 +6264,7 @@ PP(pp_split)
 		/* The rx->minlen is in characters but we want to step
 		 * s ahead by bytes. */
  		if (do_utf8)
-		    s = (char*)utf8_hop((U8*)m, len);
+		    s = (char*)utf8_hop_forward((U8*)m, len, (U8 *) strend);
  		else
 		    s = m + len; /* Fake \n at the end */
 	    }
@@ -6102,32 +6359,59 @@ PP(pp_split)
     }
 
     PUTBACK;
-    LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
+    LEAVE_SCOPE(oldsave);
     SPAGAIN;
     if (realarray) {
-	if (!mg) {
-	    if (SvSMAGICAL(ary)) {
-		PUTBACK;
+        if (!mg) {
+            PUTBACK;
+            if(AvREAL(ary)) {
+                if (av_count(ary) > 0)
+                    av_clear(ary);
+            } else {
+                AvREAL_on(ary);
+                AvREIFY_off(ary);
+
+                if (AvMAX(ary) > -1) {
+                    /* don't free mere refs */
+                    Zero(AvARRAY(ary), AvMAX(ary), SV*);
+                }
+            }
+            if(AvMAX(ary) < iters)
+                av_extend(ary,iters);
+            SPAGAIN;
+
+            /* Need to copy the SV*s from the stack into ary */
+            Copy(SP + 1 - iters, AvARRAY(ary), iters, SV*);
+            AvFILLp(ary) = iters - 1;
+
+            if (SvSMAGICAL(ary)) {
+                PUTBACK;
 		mg_set(MUTABLE_SV(ary));
 		SPAGAIN;
-	    }
-	    if (gimme == G_ARRAY) {
-		EXTEND(SP, iters);
-		Copy(AvARRAY(ary), SP + 1, iters, SV*);
-		SP += iters;
-		RETURN;
-	    }
+            }
+
+            if (gimme != G_ARRAY) {
+                /* SP points to the final SV* pushed to the stack. But the SV*  */
+                /* are not going to be used from the stack. Point SP to below   */
+                /* the first of these SV*.                                      */
+                SP -= iters;
+                PUTBACK;
+            }
 	}
 	else {
-	    PUTBACK;
-	    ENTER_with_name("call_PUSH");
-	    call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
-	    LEAVE_with_name("call_PUSH");
-	    SPAGAIN;
+            PUTBACK;
+            av_extend(ary,iters);
+            av_clear(ary);
+
+            ENTER_with_name("call_PUSH");
+            call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
+            LEAVE_with_name("call_PUSH");
+            SPAGAIN;
+
 	    if (gimme == G_ARRAY) {
 		SSize_t i;
 		/* EXTEND should not be needed - we just popped them */
-		EXTEND(SP, iters);
+		EXTEND_SKIP(SP, iters);
 		for (i=0; i < iters; i++) {
 		    SV **svp = av_fetch(ary, i, FALSE);
 		    PUSHs((svp) ? *svp : &PL_sv_undef);
@@ -6136,13 +6420,12 @@ PP(pp_split)
 	    }
 	}
     }
-    else {
-	if (gimme == G_ARRAY)
-	    RETURN;
-    }
 
-    GETTARGET;
-    XPUSHi(iters);
+    if (gimme != G_ARRAY) {
+        GETTARGET;
+        XPUSHi(iters);
+     }
+
     RETURN;
 }
 
@@ -6185,11 +6468,11 @@ PP(unimplemented_op)
        Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
        if we get here for a custom op then that means that the custom op didn't
        have an implementation. Given that OP_NAME() looks up the custom op
-       by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
-       registers &PL_unimplemented_op as the address of their custom op.
+       by its op_ppaddr, likely it will return NULL, unless someone (unhelpfully)
+       registers &Perl_unimplemented_op as the address of their custom op.
        NULL doesn't generate a useful error message. "custom" does. */
     const char *const name = op_type >= OP_max
-	? "[out of range]" : PL_op_name[PL_op->op_type];
+	? "[out of range]" : PL_op_name[op_type];
     if(OP_IS_SOCKET(op_type))
 	DIE(aTHX_ PL_no_sock_func, name);
     DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name,	op_type);
@@ -6385,7 +6668,7 @@ PP(pp_coreargs)
 
 PP(pp_avhvswitch)
 {
-    dVAR; dSP;
+    dSP;
     return PL_ppaddr[
 		(SvTYPE(TOPs) == SVt_PVAV ? OP_AEACH : OP_EACH)
 		    + (PL_op->op_private & OPpAVHVSWITCH_MASK)
@@ -6837,7 +7120,7 @@ S_find_runcv_name(void)
     return sv;
 }
 
-/* Check a  a subs arguments - i.e. that it has the correct number of args
+/* Check a sub's arguments - i.e. that it has the correct number of args
  * (and anything else we might think of in future). Typically used with
  * signatured subs.
  */
@@ -6845,16 +7128,16 @@ S_find_runcv_name(void)
 PP(pp_argcheck)
 {
     OP * const o       = PL_op;
-    UNOP_AUX_item *aux = cUNOP_AUXo->op_aux;
-    IV   params        = aux[0].iv;
-    IV   opt_params    = aux[1].iv;
-    char slurpy        = (char)(aux[2].iv);
+    struct op_argcheck_aux *aux = (struct op_argcheck_aux *)cUNOP_AUXo->op_aux;
+    UV   params        = aux->params;
+    UV   opt_params    = aux->opt_params;
+    char slurpy        = aux->slurpy;
     AV  *defav         = GvAV(PL_defgv); /* @_ */
-    IV   argc;
+    UV   argc;
     bool too_few;
 
     assert(!SvMAGICAL(defav));
-    argc = (AvFILLp(defav) + 1);
+    argc = (UV)(AvFILLp(defav) + 1);
     too_few = (argc < (params - opt_params));
 
     if (UNLIKELY(too_few || (!slurpy && argc > params)))
@@ -6871,6 +7154,42 @@ PP(pp_argcheck)
     return NORMAL;
 }
 
+PP(pp_isa)
+{
+    dSP;
+    SV *left, *right;
+
+    right = POPs;
+    left  = TOPs;
+
+    SETs(boolSV(sv_isa_sv(left, right)));
+    RETURN;
+}
+
+PP(pp_cmpchain_and)
+{
+    dSP;
+    SV *result = POPs;
+    PUTBACK;
+    if (SvTRUE_NN(result)) {
+	return cLOGOP->op_other;
+    } else {
+	TOPs = result;
+	return NORMAL;
+    }
+}
+
+PP(pp_cmpchain_dup)
+{
+    dSP;
+    SV *right = TOPs;
+    SV *left = TOPm1s;
+    TOPm1s = right;
+    TOPs = left;
+    XPUSHs(right);
+    RETURN;
+}
+
 /*
  * ex: set ts=8 sts=4 sw=4 et:
  */