Various updates and fixes to some of the SysV IPC ops and their tests

[perl5.git] / pp.c
diff --git a/pp.c b/pp.c

index e4072ea..5b5e163 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -28,28 +28,10 @@
  #include "perl.h"
  #include "keywords.h"
  
+#include "invlist_inline.h"
  #include "reentr.h"
  #include "regcharclass.h"
  
-/* XXX I can't imagine anyone who doesn't have this actually _needs_
-   it, since pid_t is an integral type.
-   --AD  2/20/1998
-*/
-#ifdef NEED_GETPID_PROTO
-extern Pid_t getpid (void);
-#endif
-
-/*
- * Some BSDs and Cygwin default to POSIX math instead of IEEE.
- * This switches them over to IEEE.
- */
-#if defined(LIBM_LIB_VERSION)
-    _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
-#endif
-
-static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
-static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
-
  /* variations on pp_null */
  
  PP(pp_stub)
@@ -62,101 +44,7 @@ PP(pp_stub)
  
  /* Pushy stuff. */
  
-/* This is also called directly by pp_lvavref.  */
-PP(pp_padav)
-{
-    dSP; dTARGET;
-    U8 gimme;
-    assert(SvTYPE(TARG) == SVt_PVAV);
-    if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO ))
-       if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) ))
-           SAVECLEARSV(PAD_SVl(PL_op->op_targ));
-    EXTEND(SP, 1);
-
-    if (PL_op->op_flags & OPf_REF) {
-       PUSHs(TARG);
-       RETURN;
-    }
-    else if (PL_op->op_private & OPpMAYBE_LVSUB) {
-        const I32 flags = is_lvalue_sub();
-        if (flags && !(flags & OPpENTERSUB_INARGS)) {
-           if (GIMME_V == G_SCALAR)
-                /* diag_listed_as: Can't return %s to lvalue scalar context */
-                Perl_croak(aTHX_ "Can't return array to lvalue scalar context");
-            PUSHs(TARG);
-            RETURN;
-       }
-    }
-
-    gimme = GIMME_V;
-    if (gimme == G_ARRAY) {
-        /* XXX see also S_pushav in pp_hot.c */
-       const SSize_t maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
-       EXTEND(SP, maxarg);
-       if (SvMAGICAL(TARG)) {
-           SSize_t i;
-           for (i=0; i < maxarg; i++) {
-               SV * const * const svp = av_fetch(MUTABLE_AV(TARG), i, FALSE);
-               SP[i+1] = (svp) ? *svp : &PL_sv_undef;
-           }
-       }
-       else {
-           SSize_t i;
-           for (i=0; i < maxarg; i++) {
-               SV * const sv = AvARRAY((const AV *)TARG)[i];
-               SP[i+1] = sv ? sv : &PL_sv_undef;
-           }
-       }
-       SP += maxarg;
-    }
-    else if (gimme == G_SCALAR) {
-       SV* const sv = sv_newmortal();
-       const SSize_t maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
-       sv_setiv(sv, maxarg);
-       PUSHs(sv);
-    }
-    RETURN;
-}
-
-PP(pp_padhv)
-{
-    dSP; dTARGET;
-    U8 gimme;
  
-    assert(SvTYPE(TARG) == SVt_PVHV);
-    XPUSHs(TARG);
-    if (UNLIKELY( PL_op->op_private & OPpLVAL_INTRO ))
-       if (LIKELY( !(PL_op->op_private & OPpPAD_STATE) ))
-           SAVECLEARSV(PAD_SVl(PL_op->op_targ));
-
-    if (PL_op->op_flags & OPf_REF)
-       RETURN;
-    else if (PL_op->op_private & OPpMAYBE_LVSUB) {
-        const I32 flags = is_lvalue_sub();
-        if (flags && !(flags & OPpENTERSUB_INARGS)) {
-            if (GIMME_V == G_SCALAR)
-                /* diag_listed_as: Can't return %s to lvalue scalar context */
-                Perl_croak(aTHX_ "Can't return hash to lvalue scalar context");
-            RETURN;
-        }
-    }
-
-    gimme = GIMME_V;
-    if (gimme == G_ARRAY) {
-       RETURNOP(Perl_do_kv(aTHX));
-    }
-    else if ((PL_op->op_private & OPpTRUEBOOL
-         || (  PL_op->op_private & OPpMAYBE_TRUEBOOL
-            && block_gimme() == G_VOID  ))
-         && (!SvRMAGICAL(TARG) || !mg_find(TARG, PERL_MAGIC_tied))
-    )
-       SETs(HvUSEDKEYS(TARG) ? &PL_sv_yes : &PL_sv_no);
-    else if (gimme == G_SCALAR) {
-       SV* const sv = Perl_hv_scalar(aTHX_ MUTABLE_HV(TARG));
-       SETs(sv);
-    }
-    RETURN;
-}
  
  PP(pp_padcv)
  {
@@ -239,20 +127,18 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
                  */
                 if (vivify_sv && sv != &PL_sv_undef) {
                     GV *gv;
+                   HV *stash;
                     if (SvREADONLY(sv))
                         Perl_croak_no_modify();
+                   gv = MUTABLE_GV(newSV(0));
+                   stash = CopSTASH(PL_curcop);
+                   if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
                     if (cUNOP->op_targ) {
                         SV * const namesv = PAD_SV(cUNOP->op_targ);
-                       HV *stash = CopSTASH(PL_curcop);
-                       if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
-                       gv = MUTABLE_GV(newSV(0));
                         gv_init_sv(gv, stash, namesv, 0);
                     }
                     else {
-                       const char * const name = CopSTASHPV(PL_curcop);
-                       gv = newGVgen_flags(name,
-                                HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
-                       SvREFCNT_inc_simple_void_NN(gv);
+                       gv_init_pv(gv, stash, "__ANONIO__", 0);
                     }
                     prepare_SV_for_RV(sv);
                     SvRV_set(sv, MUTABLE_SV(gv));
@@ -440,11 +326,15 @@ PP(pp_pos)
      else {
             const MAGIC * const mg = mg_find_mglob(sv);
             if (mg && mg->mg_len != -1) {
-               dTARGET;
                 STRLEN i = mg->mg_len;
-               if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv))
-                   i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN);
-               SETu(i);
+                if (PL_op->op_private & OPpTRUEBOOL)
+                    SETs(i ? &PL_sv_yes : &PL_sv_zero);
+                else {
+                    dTARGET;
+                    if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv))
+                        i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN);
+                    SETu(i);
+                }
                 return NORMAL;
             }
             SETs(&PL_sv_undef);
@@ -472,7 +362,7 @@ PP(pp_rv2cv)
         cv = SvTYPE(SvRV(gv)) == SVt_PVCV
             ? MUTABLE_CV(SvRV(gv))
             : MUTABLE_CV(gv);
-    }    
+    }
      else
         cv = MUTABLE_CV(&PL_sv_undef);
      SETs(MUTABLE_SV(cv));
@@ -490,7 +380,7 @@ PP(pp_prototype)
      if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs));
      if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
         const char * s = SvPVX_const(TOPs);
-       if (strnEQ(s, "CORE::", 6)) {
+        if (memBEGINs(s, SvCUR(TOPs), "CORE::")) {
             const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
             if (!code)
                 DIE(aTHX_ "Can't find an opnumber for \"%" UTF8f "\"",
@@ -575,6 +465,8 @@ S_refto(pTHX_ SV *sv)
      else if (SvPADTMP(sv)) {
          sv = newSVsv(sv);
      }
+    else if (UNLIKELY(SvSMAGICAL(sv) && mg_find(sv, PERL_MAGIC_nonelem)))
+        sv_unmagic(SvREFCNT_inc_simple_NN(sv), PERL_MAGIC_nonelem);
      else {
         SvTEMP_off(sv);
         SvREFCNT_inc_void_NN(sv);
@@ -776,7 +668,7 @@ PP(pp_study)
  
  PP(pp_trans)
  {
-    dSP; 
+    dSP;
      SV *sv;
  
      if (PL_op->op_flags & OPf_STACKED)
@@ -797,8 +689,8 @@ PP(pp_trans)
         PUSHs(newsv);
      }
      else {
-       I32 i = do_trans(sv);
-       mPUSHi(i);
+       Size_t i = do_trans(sv);
+       mPUSHi((UV)i);
      }
      RETURN;
  }
@@ -1267,18 +1159,18 @@ PP(pp_pow)
                         else if (result <= (UV)IV_MAX)
                             /* answer negative, fits in IV */
                             SETi( -(IV)result );
-                       else if (result == (UV)IV_MIN) 
+                       else if (result == (UV)IV_MIN)
                             /* 2's complement assumption: special case IV_MIN */
                             SETi( IV_MIN );
                         else
                             /* answer negative, doesn't fit */
                             SETn( -(NV)result );
                         RETURN;
-                   } 
+                   }
                 }
      }
    float_it:
-#endif    
+#endif
      {
         NV right = SvNV_nomg(svr);
         NV left  = SvNV_nomg(svl);
@@ -1376,16 +1268,10 @@ PP(pp_multiply)
              NV nr = SvNVX(svr);
              NV result;
  
-            if (
-#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
-                !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
-                && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
-#else
-                nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
-#endif
-                )
+            if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) {
                  /* nothing was lost by converting to IVs */
                  goto do_iv;
+            }
              SP--;
              result = nl * nr;
  #  if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
@@ -1424,8 +1310,9 @@ PP(pp_multiply)
                     alow = aiv;
                     auvok = TRUE; /* effectively it's a UV now */
                 } else {
-                    /* abs, auvok == false records sign */
-                   alow = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
+                    /* abs, auvok == false records sign; Using 0- here and
+                     * later to silence bogus warning from MS VC */
+                   alow = (UV) (0 - (UV) aiv);
                 }
             }
             if (buvok) {
@@ -1437,7 +1324,7 @@ PP(pp_multiply)
                     buvok = TRUE; /* effectively it's a UV now */
                 } else {
                      /* abs, buvok == false records sign */
-                   blow = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
+                   blow = (UV) (0 - (UV) biv);
                 }
             }
  
@@ -1546,15 +1433,9 @@ PP(pp_divide)
         can be too large to preserve, so don't need to compile the code to
         test the size of UVs.  */
  
-#ifdef SLOPPYDIVIDE
+#if defined(SLOPPYDIVIDE) || (defined(PERL_PRESERVE_IVUV) && !defined(NV_PRESERVES_UV))
  #  define PERL_TRY_UV_DIVIDE
      /* ensure that 20./5. == 4. */
-#else
-#  ifdef PERL_PRESERVE_IVUV
-#    ifndef NV_PRESERVES_UV
-#      define PERL_TRY_UV_DIVIDE
-#    endif
-#  endif
  #endif
  
  #ifdef PERL_TRY_UV_DIVIDE
@@ -1574,7 +1455,7 @@ PP(pp_divide)
                      right_non_neg = TRUE; /* effectively it's a UV now */
                  }
                 else {
-                    right = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
+                    right = -(UV)biv;
                  }
              }
              /* historically undef()/0 gives a "Use of uninitialized value"
@@ -1595,7 +1476,7 @@ PP(pp_divide)
                      left_non_neg = TRUE; /* effectively it's a UV now */
                  }
                 else {
-                    left = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
+                    left = -(UV)aiv;
                  }
              }
  
@@ -1615,8 +1496,11 @@ PP(pp_divide)
  #endif
                  ) {
                  /* Integer division can't overflow, but it can be imprecise.  */
+
+                /* Modern compilers optimize division followed by
+                 * modulo into a single div instruction */
                 const UV result = left / right;
-                if (result * right == left) {
+                if (left % right == 0) {
                      SP--; /* result is valid */
                      if (left_non_neg == right_non_neg) {
                          /* signs identical, result is positive.  */
@@ -1675,7 +1559,7 @@ PP(pp_modulo)
                      right = biv;
                      right_neg = FALSE; /* effectively it's a UV now */
                  } else {
-                    right = (biv == IV_MIN) ? (UV)biv : (UV)(-biv);
+                   right = (UV) (0 - (UV) biv);
                  }
              }
          }
@@ -1705,7 +1589,7 @@ PP(pp_modulo)
                          left = aiv;
                          left_neg = FALSE; /* effectively it's a UV now */
                      } else {
-                        left = (aiv == IV_MIN) ? (UV)aiv : (UV)(-aiv);
+                        left = (UV) (0 - (UV) aiv);
                      }
                  }
          }
@@ -1783,8 +1667,9 @@ PP(pp_repeat)
      IV count;
      SV *sv;
      bool infnan = FALSE;
+    const U8 gimme = GIMME_V;
  
-    if (GIMME_V == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
+    if (gimme == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
         /* TODO: think of some way of doing list-repeat overloading ??? */
         sv = POPs;
         SvGETMAGIC(sv);
@@ -1804,7 +1689,8 @@ PP(pp_repeat)
             else {
                 dTOPss;
                 ASSUME(MARK + 1 == SP);
-               XPUSHs(sv);
+                MEXTEND(SP, 1);
+                PUSHs(sv);
                 MARK[1] = &PL_sv_undef;
             }
             SP = MARK + 2;
@@ -1848,7 +1734,7 @@ PP(pp_repeat)
                         "Negative repeat count does nothing");
      }
  
-    if (GIMME_V == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
+    if (gimme == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
         dMARK;
         const SSize_t items = SP - MARK;
         const U8 mod = PL_op->op_flags & OPf_MOD;
@@ -1957,16 +1843,10 @@ PP(pp_subtract)
              NV nl = SvNVX(svl);
              NV nr = SvNVX(svr);
  
-            if (
-#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
-                !Perl_isnan(nl) && nl == (NV)(il = (IV)nl)
-                && !Perl_isnan(nr) && nr == (NV)(ir = (IV)nr)
-#else
-                nl == (NV)(il = (IV)nl) && nr == (NV)(ir = (IV)nr)
-#endif
-                )
+            if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) {
                  /* nothing was lost by converting to IVs */
                  goto do_iv;
+            }
              SP--;
              TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */
              SETs(TARG);
@@ -2001,8 +1881,8 @@ PP(pp_subtract)
                     if (aiv >= 0) {
                         auv = aiv;
                         auvok = 1;      /* Now acting as a sign flag.  */
-                   } else { /* 2s complement assumption for IV_MIN */
-                       auv = (aiv == IV_MIN) ? (UV)aiv : (UV)-aiv;
+                   } else {
+                        auv = (UV) (0 - (UV) aiv);
                     }
                 }
                 a_valid = 1;
@@ -2013,7 +1893,7 @@ PP(pp_subtract)
             UV result;
             UV buv;
             bool buvok = SvUOK(svr);
-       
+
             if (buvok)
                 buv = SvUVX(svr);
             else {
@@ -2022,7 +1902,7 @@ PP(pp_subtract)
                     buv = biv;
                     buvok = 1;
                 } else
-                    buv = (biv == IV_MIN) ? (UV)biv : (UV)-biv;
+                    buv = (UV) (0 - (UV) biv);
             }
             /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
                else "IV" now, independent of how it came in.
@@ -2099,7 +1979,7 @@ static UV S_uv_shift(UV uv, int shift, bool left)
         shift = -shift;
         left = !left;
     }
-   if (shift >= IV_BITS) {
+   if (UNLIKELY(shift >= IV_BITS)) {
         return 0;
     }
     return left ? uv << shift : uv >> shift;
@@ -2107,14 +1987,34 @@ static UV S_uv_shift(UV uv, int shift, bool left)
  
  static IV S_iv_shift(IV iv, int shift, bool left)
  {
-   if (shift < 0) {
-       shift = -shift;
-       left = !left;
-   }
-   if (shift >= IV_BITS) {
-       return iv < 0 && !left ? -1 : 0;
-   }
-   return left ? iv << shift : iv >> shift;
+    if (shift < 0) {
+        shift = -shift;
+        left = !left;
+    }
+
+    if (UNLIKELY(shift >= IV_BITS)) {
+        return iv < 0 && !left ? -1 : 0;
+    }
+
+    /* For left shifts, perl 5 has chosen to treat the value as unsigned for
+     * the * purposes of shifting, then cast back to signed.  This is very
+     * different from Raku:
+     *
+     * $ raku -e 'say -2 +< 5'
+     * -64
+     *
+     * $ ./perl -le 'print -2 << 5'
+     * 18446744073709551552
+     * */
+    if (left) {
+        if (iv == IV_MIN) { /* Casting this to a UV is undefined behavior */
+            return 0;
+        }
+        return (IV) (((UV) iv) << shift);
+    }
+
+    /* Here is right shift */
+    return iv >> shift;
  }
  
  #define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE)
@@ -2162,14 +2062,20 @@ PP(pp_lt)
  {
      dSP;
      SV *left, *right;
+    U32 flags_and, flags_or;
  
-    tryAMAGICbin_MG(lt_amg, AMGf_set|AMGf_numeric);
+    tryAMAGICbin_MG(lt_amg, AMGf_numeric);
      right = POPs;
      left  = TOPs;
+    flags_and = SvFLAGS(left) & SvFLAGS(right);
+    flags_or  = SvFLAGS(left) | SvFLAGS(right);
+
      SETs(boolSV(
-       (SvIOK_notUV(left) && SvIOK_notUV(right))
-       ? (SvIVX(left) < SvIVX(right))
-       : (do_ncmp(left, right) == -1)
+        ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+        ?    (SvIVX(left) < SvIVX(right))
+        : (flags_and & SVf_NOK)
+        ?    (SvNVX(left) < SvNVX(right))
+        : (do_ncmp(left, right) == -1)
      ));
      RETURN;
  }
@@ -2178,14 +2084,20 @@ PP(pp_gt)
  {
      dSP;
      SV *left, *right;
+    U32 flags_and, flags_or;
  
-    tryAMAGICbin_MG(gt_amg, AMGf_set|AMGf_numeric);
+    tryAMAGICbin_MG(gt_amg, AMGf_numeric);
      right = POPs;
      left  = TOPs;
+    flags_and = SvFLAGS(left) & SvFLAGS(right);
+    flags_or  = SvFLAGS(left) | SvFLAGS(right);
+
      SETs(boolSV(
-       (SvIOK_notUV(left) && SvIOK_notUV(right))
-       ? (SvIVX(left) > SvIVX(right))
-       : (do_ncmp(left, right) == 1)
+        ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+        ?    (SvIVX(left) > SvIVX(right))
+        : (flags_and & SVf_NOK)
+        ?    (SvNVX(left) > SvNVX(right))
+        : (do_ncmp(left, right) == 1)
      ));
      RETURN;
  }
@@ -2194,14 +2106,20 @@ PP(pp_le)
  {
      dSP;
      SV *left, *right;
+    U32 flags_and, flags_or;
  
-    tryAMAGICbin_MG(le_amg, AMGf_set|AMGf_numeric);
+    tryAMAGICbin_MG(le_amg, AMGf_numeric);
      right = POPs;
      left  = TOPs;
+    flags_and = SvFLAGS(left) & SvFLAGS(right);
+    flags_or  = SvFLAGS(left) | SvFLAGS(right);
+
      SETs(boolSV(
-       (SvIOK_notUV(left) && SvIOK_notUV(right))
-       ? (SvIVX(left) <= SvIVX(right))
-       : (do_ncmp(left, right) <= 0)
+        ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+        ?    (SvIVX(left) <= SvIVX(right))
+        : (flags_and & SVf_NOK)
+        ?    (SvNVX(left) <= SvNVX(right))
+        : (do_ncmp(left, right) <= 0)
      ));
      RETURN;
  }
@@ -2210,14 +2128,20 @@ PP(pp_ge)
  {
      dSP;
      SV *left, *right;
+    U32 flags_and, flags_or;
  
-    tryAMAGICbin_MG(ge_amg, AMGf_set|AMGf_numeric);
+    tryAMAGICbin_MG(ge_amg, AMGf_numeric);
      right = POPs;
      left  = TOPs;
+    flags_and = SvFLAGS(left) & SvFLAGS(right);
+    flags_or  = SvFLAGS(left) | SvFLAGS(right);
+
      SETs(boolSV(
-       (SvIOK_notUV(left) && SvIOK_notUV(right))
-       ? (SvIVX(left) >= SvIVX(right))
-       : ( (do_ncmp(left, right) & 2) == 0)
+        ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+        ?    (SvIVX(left) >= SvIVX(right))
+        : (flags_and & SVf_NOK)
+        ?    (SvNVX(left) >= SvNVX(right))
+        : ( (do_ncmp(left, right) & 2) == 0)
      ));
      RETURN;
  }
@@ -2226,14 +2150,20 @@ PP(pp_ne)
  {
      dSP;
      SV *left, *right;
+    U32 flags_and, flags_or;
  
-    tryAMAGICbin_MG(ne_amg, AMGf_set|AMGf_numeric);
+    tryAMAGICbin_MG(ne_amg, AMGf_numeric);
      right = POPs;
      left  = TOPs;
+    flags_and = SvFLAGS(left) & SvFLAGS(right);
+    flags_or  = SvFLAGS(left) | SvFLAGS(right);
+
      SETs(boolSV(
-       (SvIOK_notUV(left) && SvIOK_notUV(right))
-       ? (SvIVX(left) != SvIVX(right))
-       : (do_ncmp(left, right) != 0)
+        ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
+        ?    (SvIVX(left) != SvIVX(right))
+        : (flags_and & SVf_NOK)
+        ?    (SvNVX(left) != SvNVX(right))
+        : (do_ncmp(left, right) != 0)
      ));
      RETURN;
  }
@@ -2359,7 +2289,7 @@ PP(pp_sle)
         break;
      }
  
-    tryAMAGICbin_MG(amg_type, AMGf_set);
+    tryAMAGICbin_MG(amg_type, 0);
      {
        dPOPTOPssrl;
        const int cmp =
@@ -2377,7 +2307,7 @@ PP(pp_sle)
  PP(pp_seq)
  {
      dSP;
-    tryAMAGICbin_MG(seq_amg, AMGf_set);
+    tryAMAGICbin_MG(seq_amg, 0);
      {
        dPOPTOPssrl;
        SETs(boolSV(sv_eq_flags(left, right, 0)));
@@ -2388,7 +2318,7 @@ PP(pp_seq)
  PP(pp_sne)
  {
      dSP;
-    tryAMAGICbin_MG(sne_amg, AMGf_set);
+    tryAMAGICbin_MG(sne_amg, 0);
      {
        dPOPTOPssrl;
        SETs(boolSV(!sv_eq_flags(left, right, 0)));
@@ -2621,8 +2551,11 @@ PP(pp_negate)
  PP(pp_not)
  {
      dSP;
-    tryAMAGICun_MG(not_amg, AMGf_set);
-    *PL_stack_sp = boolSV(!SvTRUE_nomg(*PL_stack_sp));
+    SV *sv;
+
+    tryAMAGICun_MG(not_amg, 0);
+    sv = *PL_stack_sp;
+    *PL_stack_sp = boolSV(!SvTRUE_nomg_NN(sv));
      return NORMAL;
  }
  
@@ -2638,25 +2571,24 @@ S_scomplement(pTHX_ SV *targ, SV *sv)
  
         if (SvUTF8(TARG)) {
              if (len && ! utf8_to_bytes(tmps, &len)) {
-                Perl_croak(aTHX_ fatal_above_ff_msg, PL_op_desc[PL_op->op_type]);
+                Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[PL_op->op_type]);
              }
-            SvCUR(TARG) = len;
+            SvCUR_set(TARG, len);
              SvUTF8_off(TARG);
          }
  
         anum = len;
  
-#ifdef LIBERAL
         {
             long *tmpl;
-           for ( ; anum && (unsigned long)tmps % sizeof(long); anum--, tmps++)
+           for ( ; anum && PTR2nat(tmps) % sizeof(long); anum--, tmps++)
                 *tmps = ~*tmps;
             tmpl = (long*)tmps;
             for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
                 *tmpl = ~*tmpl;
             tmps = (U8*)tmpl;
         }
-#endif
+
         for ( ; anum > 0; anum--, tmps++)
             *tmps = ~*tmps;
  }
@@ -2752,7 +2684,6 @@ PP(pp_i_divide)
  
  PP(pp_i_modulo)
  {
-     /* This is the vanilla old i_modulo. */
       dSP; dATARGET;
       tryAMAGICbin_MG(modulo_amg, AMGf_assign);
       {
@@ -2768,30 +2699,6 @@ PP(pp_i_modulo)
       }
  }
  
-#if defined(__GLIBC__) && IVSIZE == 8 \
-    && ( __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 8))
-
-PP(pp_i_modulo_glibc_bugfix)
-{
-     /* This is the i_modulo with the workaround for the _moddi3 bug
-      * in (at least) glibc 2.2.5 (the PERL_ABS() the workaround).
-      * See below for pp_i_modulo. */
-     dSP; dATARGET;
-     tryAMAGICbin_MG(modulo_amg, AMGf_assign);
-     {
-         dPOPTOPiirl_nomg;
-         if (!right)
-              DIE(aTHX_ "Illegal modulus zero");
-         /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
-         if (right == -1)
-             SETi( 0 );
-         else
-             SETi( left % PERL_ABS(right) );
-         RETURN;
-     }
-}
-#endif
-
  PP(pp_i_add)
  {
      dSP; dATARGET;
@@ -2817,7 +2724,7 @@ PP(pp_i_subtract)
  PP(pp_i_lt)
  {
      dSP;
-    tryAMAGICbin_MG(lt_amg, AMGf_set);
+    tryAMAGICbin_MG(lt_amg, 0);
      {
        dPOPTOPiirl_nomg;
        SETs(boolSV(left < right));
@@ -2828,7 +2735,7 @@ PP(pp_i_lt)
  PP(pp_i_gt)
  {
      dSP;
-    tryAMAGICbin_MG(gt_amg, AMGf_set);
+    tryAMAGICbin_MG(gt_amg, 0);
      {
        dPOPTOPiirl_nomg;
        SETs(boolSV(left > right));
@@ -2839,7 +2746,7 @@ PP(pp_i_gt)
  PP(pp_i_le)
  {
      dSP;
-    tryAMAGICbin_MG(le_amg, AMGf_set);
+    tryAMAGICbin_MG(le_amg, 0);
      {
        dPOPTOPiirl_nomg;
        SETs(boolSV(left <= right));
@@ -2850,7 +2757,7 @@ PP(pp_i_le)
  PP(pp_i_ge)
  {
      dSP;
-    tryAMAGICbin_MG(ge_amg, AMGf_set);
+    tryAMAGICbin_MG(ge_amg, 0);
      {
        dPOPTOPiirl_nomg;
        SETs(boolSV(left >= right));
@@ -2861,7 +2768,7 @@ PP(pp_i_ge)
  PP(pp_i_eq)
  {
      dSP;
-    tryAMAGICbin_MG(eq_amg, AMGf_set);
+    tryAMAGICbin_MG(eq_amg, 0);
      {
        dPOPTOPiirl_nomg;
        SETs(boolSV(left == right));
@@ -2872,7 +2779,7 @@ PP(pp_i_eq)
  PP(pp_i_ne)
  {
      dSP;
-    tryAMAGICbin_MG(ne_amg, AMGf_set);
+    tryAMAGICbin_MG(ne_amg, 0);
      {
        dPOPTOPiirl_nomg;
        SETs(boolSV(left != right));
@@ -2998,7 +2905,7 @@ PP(pp_rand)
      {
         dSP;
         NV value;
-    
+
         if (MAXARG < 1)
         {
             EXTEND(SP, 1);
@@ -3169,7 +3076,7 @@ PP(pp_oct)
          /* If Unicode, try to downgrade
           * If not possible, croak. */
          SV* const tsv = sv_2mortal(newSVsv(sv));
-       
+
          SvUTF8_on(tsv);
          sv_utf8_downgrade(tsv, FALSE);
          tmps = SvPV_const(tsv, len);
@@ -3182,11 +3089,16 @@ PP(pp_oct)
      if (*tmps == '0')
          tmps++, len--;
      if (isALPHA_FOLD_EQ(*tmps, 'x')) {
+        tmps++, len--;
+        flags |= PERL_SCAN_DISALLOW_PREFIX;
      hex:
          result_uv = grok_hex (tmps, &len, &flags, &result_nv);
      }
-    else if (isALPHA_FOLD_EQ(*tmps, 'b'))
+    else if (isALPHA_FOLD_EQ(*tmps, 'b')) {
+        tmps++, len--;
+        flags |= PERL_SCAN_DISALLOW_PREFIX;
          result_uv = grok_bin (tmps, &len, &flags, &result_nv);
+    }
      else
          result_uv = grok_oct (tmps, &len, &flags, &result_nv);
  
@@ -3201,52 +3113,74 @@ PP(pp_oct)
  
  /* String stuff. */
  
+
  PP(pp_length)
  {
      dSP; dTARGET;
      SV * const sv = TOPs;
  
      U32 in_bytes = IN_BYTES;
-    /* simplest case shortcut */
-    /* turn off SVf_UTF8 in tmp flags if HINT_BYTES on*/
+    /* Simplest case shortcut:
+     * set svflags to just the SVf_POK|SVs_GMG|SVf_UTF8 from the SV,
+     * with the SVf_UTF8 flag inverted if under 'use bytes' (HINT_BYTES
+     * set)
+     */
      U32 svflags = (SvFLAGS(sv) ^ (in_bytes << 26)) & (SVf_POK|SVs_GMG|SVf_UTF8);
-    STATIC_ASSERT_STMT(HINT_BYTES == 0x00000008 && SVf_UTF8 == 0x20000000 && (SVf_UTF8 == HINT_BYTES << 26));
+
+    STATIC_ASSERT_STMT(SVf_UTF8 == (HINT_BYTES << 26));
      SETs(TARG);
  
-    if(LIKELY(svflags == SVf_POK))
+    if (LIKELY(svflags == SVf_POK))
          goto simple_pv;
-    if(svflags & SVs_GMG)
+
+    if (svflags & SVs_GMG)
          mg_get(sv);
+
      if (SvOK(sv)) {
-       if (!IN_BYTES) /* reread to avoid using an C auto/register */
-           sv_setiv(TARG, (IV)sv_len_utf8_nomg(sv));
-       else
-       {
-           STRLEN len;
+        STRLEN len;
+       if (!IN_BYTES) { /* reread to avoid using an C auto/register */
+            if ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == SVf_POK)
+                goto simple_pv;
+            if ( SvPOK(sv) && (PL_op->op_private & OPpTRUEBOOL)) {
+                /* no need to convert from bytes to chars */
+                len = SvCUR(sv);
+                goto return_bool;
+            }
+           len = sv_len_utf8_nomg(sv);
+        }
+       else {
              /* unrolled SvPV_nomg_const(sv,len) */
-            if(SvPOK_nog(sv)){
-                simple_pv:
+            if (SvPOK_nog(sv)) {
+              simple_pv:
                  len = SvCUR(sv);
-            } else  {
+                if (PL_op->op_private & OPpTRUEBOOL) {
+                  return_bool:
+                    SETs(len ? &PL_sv_yes : &PL_sv_zero);
+                    return NORMAL;
+                }
+            }
+            else {
                  (void)sv_2pv_flags(sv, &len, 0|SV_CONST_RETURN);
              }
-           sv_setiv(TARG, (IV)(len));
         }
-    } else {
+        TARGi((IV)(len), 1);
+    }
+    else {
         if (!SvPADTMP(TARG)) {
+            /* OPpTARGET_MY: targ is var in '$lex = length()' */
              sv_set_undef(TARG);
-       } else { /* TARG is on stack at this point and is overwriten by SETs.
-                   This branch is the odd one out, so put TARG by default on
-                   stack earlier to let local SP go out of liveness sooner */
+            SvSETMAGIC(TARG);
+       }
+        else
+            /* TARG is on stack at this point and is overwriten by SETs.
+             * This branch is the odd one out, so put TARG by default on
+             * stack earlier to let local SP go out of liveness sooner */
              SETs(&PL_sv_undef);
-            goto no_set_magic;
-        }
      }
-    SvSETMAGIC(TARG);
-    no_set_magic:
      return NORMAL; /* no putback, SP didn't move in this opcode */
  }
  
+
  /* Returns false if substring is completely outside original string.
     No length is indicated by len_iv = 0 and len_is_uv = 0.  len_is_uv must
     always be true for an explicit 0.
@@ -3549,7 +3483,7 @@ PP(pp_index)
                    convert the small string to ISO-8859-1, then there is no
                    way that it could be found anywhere by index.  */
                 retval = -1;
-               goto fail;
+               goto push_result;
             }
  
             /* At this point, pv is a malloc()ed string. So donate it to temp
@@ -3612,8 +3546,23 @@ PP(pp_index)
             retval = sv_pos_b2u_flags(big, retval, SV_CONST_RETURN);
      }
      SvREFCNT_dec(temp);
- fail:
-    PUSHi(retval);
+
+  push_result:
+    /* OPpTRUEBOOL indicates an '== -1' has been optimised away */
+    if (PL_op->op_private & OPpTRUEBOOL) {
+        SV *result = ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG))
+            ? &PL_sv_yes : &PL_sv_no;
+        if (PL_op->op_private & OPpTARGET_MY) {
+            /* $lex = (index() == -1) */
+            sv_setsv_mg(TARG, result);
+            PUSHs(TARG);
+        }
+        else {
+            PUSHs(result);
+        }
+    }
+    else
+        PUSHi(retval);
      RETURN;
  }
  
@@ -3718,29 +3667,25 @@ PP(pp_crypt)
          sv_utf8_downgrade(tsv, FALSE);
          tmps = SvPV_const(tsv, len);
      }
-#   ifdef USE_ITHREADS
-#     ifdef HAS_CRYPT_R
+#  ifdef USE_ITHREADS
+#    ifdef HAS_CRYPT_R
      if (!PL_reentrant_buffer->_crypt_struct_buffer) {
        /* This should be threadsafe because in ithreads there is only
         * one thread per interpreter.  If this would not be true,
         * we would need a mutex to protect this malloc. */
          PL_reentrant_buffer->_crypt_struct_buffer =
           (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
-#if defined(__GLIBC__) || defined(__EMX__)
+#      if defined(__GLIBC__) || defined(__EMX__)
         if (PL_reentrant_buffer->_crypt_struct_buffer) {
             PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
-           /* work around glibc-2.2.5 bug */
-           PL_reentrant_buffer->_crypt_struct_buffer->current_saltbits = 0;
         }
-#endif
+#      endif
      }
-#     endif /* HAS_CRYPT_R */
-#   endif /* USE_ITHREADS */
-#   ifdef FCRYPT
-    sv_setpv(TARG, fcrypt(tmps, SvPV_nolen_const(right)));
-#   else
+#    endif /* HAS_CRYPT_R */
+#  endif /* USE_ITHREADS */
+
      sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
-#   endif
+
      SvUTF8_off(TARG);
      SETTARG;
      RETURN;
@@ -3750,7 +3695,7 @@ PP(pp_crypt)
  #endif
  }
  
-/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level.  So 
+/* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level.  So
   * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
  
  
@@ -3780,6 +3725,7 @@ PP(pp_ucfirst)
      STRLEN tculen;  /* tculen is the byte length of the freshly titlecased (or
                      * lowercased) character stored in tmpbuf.  May be either
                      * UTF-8 or not, but in either case is the number of bytes */
+    bool remove_dot_above = FALSE;
  
      s = (const U8*)SvPV_const(source, slen);
  
@@ -3788,6 +3734,14 @@ PP(pp_ucfirst)
       * not convert in-place. */
      inplace = !SvREADONLY(source) && SvPADTMP(source);
  
+#ifdef USE_LOCALE_CTYPE
+
+    if (IN_LC_RUNTIME(LC_CTYPE)) {
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+    }
+
+#endif
+
      /* First calculate what the changed first character should be.  This affects
       * whether we can just swap it out, leaving the rest of the string unchanged,
       * or even if have to convert the dest to UTF-8 when the source isn't */
@@ -3795,10 +3749,12 @@ PP(pp_ucfirst)
      if (! slen) {   /* If empty */
         need = 1; /* still need a trailing NUL */
         ulen = 0;
+        *tmpbuf = '\0';
      }
      else if (DO_UTF8(source)) {        /* Is the source utf8? */
         doing_utf8 = TRUE;
          ulen = UTF8SKIP(s);
+
          if (op_type == OP_UCFIRST) {
  #ifdef USE_LOCALE_CTYPE
             _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
@@ -3807,12 +3763,45 @@ PP(pp_ucfirst)
  #endif
         }
          else {
+
  #ifdef USE_LOCALE_CTYPE
+
             _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
+
+            /* In turkic locales, lower casing an 'I' normally yields U+0131,
+             * LATIN SMALL LETTER DOTLESS I, but not if the grapheme also
+             * contains a COMBINING DOT ABOVE.  Instead it is treated like
+             * LATIN CAPITAL LETTER I WITH DOT ABOVE lowercased to 'i'.  The
+             * call to lowercase above has handled this.  But SpecialCasing.txt
+             * says we are supposed to remove the COMBINING DOT ABOVE.  We can
+             * tell if we have this situation if I ==> i in a turkic locale. */
+            if (   UNLIKELY(PL_in_utf8_turkic_locale)
+                && IN_LC_RUNTIME(LC_CTYPE)
+                && (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')))
+            {
+                /* Here, we know there was a COMBINING DOT ABOVE.  We won't be
+                 * able to handle this in-place. */
+                inplace = FALSE;
+
+                /* It seems likely that the DOT will immediately follow the
+                 * 'I'.  If so, we can remove it simply by indicating to the
+                 * code below to start copying the source just beyond the DOT.
+                 * We know its length is 2 */
+                if (LIKELY(memBEGINs(s + 1, s + slen, COMBINING_DOT_ABOVE_UTF8))) {
+                    ulen += 2;
+                }
+                else {  /* But if it doesn't follow immediately, set a flag for
+                           the code below */
+                    remove_dot_above = TRUE;
+                }
+            }
  #else
+            PERL_UNUSED_VAR(remove_dot_above);
+
             _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0);
  #endif
-       }
+
+        }
  
          /* we can't do in-place if the length changes.  */
          if (ulen != tculen) inplace = FALSE;
@@ -3820,49 +3809,69 @@ PP(pp_ucfirst)
      }
      else { /* Non-zero length, non-UTF-8,  Need to consider locale and if
             * latin1 is treated as caseless.  Note that a locale takes
-           * precedence */ 
+           * precedence */
         ulen = 1;       /* Original character is 1 byte */
         tculen = 1;     /* Most characters will require one byte, but this will
                          * need to be overridden for the tricky ones */
         need = slen + 1;
  
-       if (op_type == OP_LCFIRST) {
  
-           /* lower case the first letter: no trickiness for any character */
  #ifdef USE_LOCALE_CTYPE
-            if (IN_LC_RUNTIME(LC_CTYPE)) {
-                _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-                *tmpbuf = toLOWER_LC(*s);
-            }
-            else
-#endif
+
+        if (IN_LC_RUNTIME(LC_CTYPE)) {
+            if (    UNLIKELY(PL_in_utf8_turkic_locale)
+                && (   (op_type == OP_LCFIRST && UNLIKELY(*s == 'I'))
+                    || (op_type == OP_UCFIRST && UNLIKELY(*s == 'i'))))
              {
-                *tmpbuf = (IN_UNI_8_BIT)
-                          ? toLOWER_LATIN1(*s)
-                          : toLOWER(*s);
+                if (*s == 'I') { /* lcfirst('I') */
+                    tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
+                    tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
+                }
+                else {  /* ucfirst('i') */
+                    tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                    tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                }
+                tculen = 2;
+                inplace = FALSE;
+                doing_utf8 = TRUE;
+                convert_source_to_utf8 = TRUE;
+                need += variant_under_utf8_count(s, s + slen);
              }
-       }
-#ifdef USE_LOCALE_CTYPE
-       /* is ucfirst() */
-       else if (IN_LC_RUNTIME(LC_CTYPE)) {
-            if (IN_UTF8_CTYPE_LOCALE) {
-                goto do_uni_rules;
+            else if (op_type == OP_LCFIRST) {
+
+                /* For lc, there are no gotchas for UTF-8 locales (other than
+                 * the turkish ones already handled above) */
+                *tmpbuf = toLOWER_LC(*s);
              }
+            else { /* ucfirst */
  
-            _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-            *tmpbuf = (U8) toUPPER_LC(*s); /* This would be a bug if any
-                                              locales have upper and title case
-                                              different */
-       }
+                /* But for uc, some characters require special handling */
+                if (IN_UTF8_CTYPE_LOCALE) {
+                    goto do_uni_rules;
+                }
+
+                /* This would be a bug if any locales have upper and title case
+                 * different */
+                *tmpbuf = (U8) toUPPER_LC(*s);
+            }
+        }
+        else
  #endif
-       else if (! IN_UNI_8_BIT) {
-           *tmpbuf = toUPPER(*s);      /* Returns caseless for non-ascii, or
-                                        * on EBCDIC machines whatever the
-                                        * native function does */
-       }
+        /* Here, not in locale.  If not using Unicode rules, is a simple
+         * lower/upper, depending */
+        if (! IN_UNI_8_BIT) {
+            *tmpbuf = (op_type == OP_LCFIRST)
+                      ? toLOWER(*s)
+                      : toUPPER(*s);
+        }
+        else if (op_type == OP_LCFIRST) {
+            /* lower case the first letter: no trickiness for any character */
+            *tmpbuf = toLOWER_LATIN1(*s);
+        }
          else {
              /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
-             * UTF-8, which we treat as not in locale), and cased latin1 */
+             * non-turkic UTF-8, which we treat as not in locale), and cased
+             * latin1 */
             UV title_ord;
  #ifdef USE_LOCALE_CTYPE
        do_uni_rules:
@@ -3886,16 +3895,19 @@ PP(pp_ucfirst)
                     inplace = FALSE;
  
                      /* If the result won't fit in a byte, the entire result
-                     * will have to be in UTF-8.  Assume worst case sizing in
-                     * conversion. (all latin1 characters occupy at most two
-                     * bytes in utf8) */
+                     * will have to be in UTF-8.  Allocate enough space for the
+                     * expanded first byte, and if UTF-8, the rest of the input
+                     * string, some or all of which may also expand to two
+                     * bytes, plus the terminating NUL. */
                     if (title_ord > 255) {
                         doing_utf8 = TRUE;
                         convert_source_to_utf8 = TRUE;
-                       need = slen * 2 + 1;
+                       need = slen
+                            + variant_under_utf8_count(s, s + slen)
+                            + 1;
  
                          /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
-                         * (both) characters whose title case is above 255 is
+                         * characters whose title case is above 255 is
                           * 2. */
                         ulen = 2;
                     }
@@ -3939,6 +3951,29 @@ PP(pp_ucfirst)
                  * of the string. */
                 sv_setpvn(dest, (char*)tmpbuf, tculen);
                 if (slen > ulen) {
+
+                    /* But this boolean being set means we are in a turkic
+                     * locale, and there is a DOT character that needs to be
+                     * removed, and it isn't immediately after the current
+                     * character.  Keep concatenating characters to the output
+                     * one at a time, until we find the DOT, which we simply
+                     * skip */
+                    if (UNLIKELY(remove_dot_above)) {
+                        do {
+                            Size_t this_len = UTF8SKIP(s + ulen);
+
+                            sv_catpvn(dest, (char*)(s + ulen), this_len);
+
+                            ulen += this_len;
+                            if (memBEGINs(s + ulen, s + slen, COMBINING_DOT_ABOVE_UTF8)) {
+                                ulen += 2;
+                                break;
+                            }
+                        } while (s + ulen < s + slen);
+                    }
+
+                    /* The rest of the string can be concatenated unchanged,
+                     * all at once */
                     sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
                 }
             }
@@ -3950,15 +3985,18 @@ PP(pp_ucfirst)
                  * into tmpbuf.  First put that into dest, and then append the
                  * rest of the source, converting it to UTF-8 as we go. */
  
-               /* Assert tculen is 2 here because the only two characters that
+               /* Assert tculen is 2 here because the only characters that
                  * get to this part of the code have 2-byte UTF-8 equivalents */
+                assert(tculen == 2);
                 *d++ = *tmpbuf;
                 *d++ = *(tmpbuf + 1);
                 s++;    /* We have just processed the 1st char */
  
-               for (; s < send; s++) {
-                   d = uvchr_to_utf8(d, *s);
-               }
+                while (s < send) {
+                    append_utf8_from_native_byte(*s, &d);
+                    s++;
+                }
+
                 *d = '\0';
                 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
             }
@@ -3970,7 +4008,7 @@ PP(pp_ucfirst)
         }
  
      }
-    else {  /* Neither source nor dest are in or need to be UTF-8 */
+    else {  /* Neither source nor dest are, nor need to be UTF-8 */
         if (slen) {
             if (inplace) {  /* in-place, only need to change the 1st char */
                 *d = *tmpbuf;
@@ -3990,7 +4028,7 @@ PP(pp_ucfirst)
  
         /* In a "use bytes" we don't treat the source as UTF-8, but, still want
          * the destination to retain that flag */
-       if (SvUTF8(source) && ! IN_BYTES)
+       if (DO_UTF8(source))
             SvUTF8_on(dest);
  
         if (!inplace) { /* Finish the rest of the string, unchanged */
@@ -4011,9 +4049,6 @@ PP(pp_ucfirst)
      return NORMAL;
  }
  
-/* There's so much setup/teardown code common between uc and lc, I wonder if
-   it would be worth merging the two, and just having a switch outside each
-   of the three tight loops.  There is less and less commonality though */
  PP(pp_uc)
  {
      dSP;
@@ -4065,6 +4100,14 @@ PP(pp_uc)
         SETs(dest);
      }
  
+#ifdef USE_LOCALE_CTYPE
+
+    if (IN_LC_RUNTIME(LC_CTYPE)) {
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+    }
+
+#endif
+
      /* Overloaded values may have toggled the UTF-8 flag on source, so we need
         to check DO_UTF8 again here.  */
  
@@ -4072,6 +4115,8 @@ PP(pp_uc)
         const U8 *const send = s + len;
         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
  
+#define GREEK_CAPITAL_LETTER_IOTA 0x0399
+#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
         /* All occurrences of these are to be moved to follow any other marks.
          * This is context-dependent.  We may not be passed enough context to
          * move the iota subscript beyond all of them, but we do the best we can
@@ -4088,12 +4133,16 @@ PP(pp_uc)
             STRLEN u;
             STRLEN ulen;
             UV uv;
-           if (in_iota_subscript && ! _is_utf8_mark(s)) {
+           if (UNLIKELY(in_iota_subscript)) {
+                UV cp = utf8_to_uvchr_buf(s, send, NULL);
+
+                if (! _invlist_contains_cp(PL_utf8_mark, cp)) {
  
-               /* A non-mark.  Time to output the iota subscript */
-               Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
-                d += capital_iota_len;
-               in_iota_subscript = FALSE;
+                    /* A non-mark.  Time to output the iota subscript */
+                    *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA);
+                    *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA);
+                    in_iota_subscript = FALSE;
+                }
              }
  
              /* Then handle the current character.  Get the changed case value
@@ -4105,8 +4154,6 @@ PP(pp_uc)
  #else
              uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, 0);
  #endif
-#define GREEK_CAPITAL_LETTER_IOTA 0x0399
-#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
              if (uv == GREEK_CAPITAL_LETTER_IOTA
                  && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
              {
@@ -4120,9 +4167,10 @@ PP(pp_uc)
  
                      /* If someone uppercases one million U+03B0s we SvGROW()
                       * one million times.  Or we could try guessing how much to
-                     * allocate without allocating too much.  Such is life.
-                     * See corresponding comment in lc code for another option
-                     * */
+                     * allocate without allocating too much.  But we can't
+                     * really guess without examining the rest of the string.
+                     * Such is life.  See corresponding comment in lc code for
+                     * another option */
                      d = o + (U8*) SvGROW(dest, min);
                  }
                  Copy(tmpbuf, d, ulen, U8);
@@ -4131,8 +4179,8 @@ PP(pp_uc)
              s += u;
         }
         if (in_iota_subscript) {
-            Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
-            d += capital_iota_len;
+            *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA);
+            *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA);
         }
         SvUTF8_on(dest);
         *d = '\0';
@@ -4151,7 +4199,6 @@ PP(pp_uc)
                  if (IN_UTF8_CTYPE_LOCALE) {
                      goto do_uni_rules;
                  }
-                _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
                 for (; s < send; d++, s++)
                      *d = (U8) toUPPER_LC(*s);
             }
@@ -4167,16 +4214,27 @@ PP(pp_uc)
            do_uni_rules:
  #endif
                 for (; s < send; d++, s++) {
+                    Size_t extra;
+
                     *d = toUPPER_LATIN1_MOD(*s);
-                   if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
+                   if (   LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)
+
+#ifdef USE_LOCALE_CTYPE
+
+                        && (LIKELY(   ! PL_in_utf8_turkic_locale
+                                   || ! IN_LC_RUNTIME(LC_CTYPE))
+                                   || *s != 'i')
+#endif
+
+                    ) {
                          continue;
                      }
  
                     /* The mainstream case is the tight loop above.  To avoid
-                    * extra tests in that, all three characters that require
-                    * special handling are mapped by the MOD to the one tested
-                    * just above.  
-                    * Use the source to distinguish between the three cases */
+                     * extra tests in that, all three characters that always
+                     * require special handling are mapped by the MOD to the
+                     * one tested just above.  Use the source to distinguish
+                     * between those cases */
  
  #if    UNICODE_MAJOR_VERSION > 2                                        \
     || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1          \
@@ -4185,7 +4243,7 @@ PP(pp_uc)
  
                         /* uc() of this requires 2 characters, but they are
                          * ASCII.  If not enough room, grow the string */
-                       if (SvLEN(dest) < ++min) {      
+                       if (SvLEN(dest) < ++min) {
                             const UV o = d - (U8*)SvPVX_const(dest);
                             d = o + (U8*) SvGROW(dest, min);
                         }
@@ -4194,59 +4252,105 @@ PP(pp_uc)
                     }
  #endif
  
-                   /* The other two special handling characters have their
+                   /* The other special handling characters have their
                      * upper cases outside the latin1 range, hence need to be
-                    * in UTF-8, so the whole result needs to be in UTF-8.  So,
-                    * here we are somewhere in the middle of processing a
-                    * non-UTF-8 string, and realize that we will have to convert
-                    * the whole thing to UTF-8.  What to do?  There are
-                    * several possibilities.  The simplest to code is to
-                    * convert what we have so far, set a flag, and continue on
-                    * in the loop.  The flag would be tested each time through
-                    * the loop, and if set, the next character would be
-                    * converted to UTF-8 and stored.  But, I (khw) didn't want
-                    * to slow down the mainstream case at all for this fairly
-                    * rare case, so I didn't want to add a test that didn't
-                    * absolutely have to be there in the loop, besides the
-                    * possibility that it would get too complicated for
-                    * optimizers to deal with.  Another possibility is to just
-                    * give up, convert the source to UTF-8, and restart the
-                    * function that way.  Another possibility is to convert
-                    * both what has already been processed and what is yet to
-                    * come separately to UTF-8, then jump into the loop that
-                    * handles UTF-8.  But the most efficient time-wise of the
-                    * ones I could think of is what follows, and turned out to
-                    * not require much extra code.  */
-
-                   /* Convert what we have so far into UTF-8, telling the
+                    * in UTF-8, so the whole result needs to be in UTF-8.
+                     *
+                     * So, here we are somewhere in the middle of processing a
+                     * non-UTF-8 string, and realize that we will have to
+                     * convert the whole thing to UTF-8.  What to do?  There
+                     * are several possibilities.  The simplest to code is to
+                     * convert what we have so far, set a flag, and continue on
+                     * in the loop.  The flag would be tested each time through
+                     * the loop, and if set, the next character would be
+                     * converted to UTF-8 and stored.  But, I (khw) didn't want
+                     * to slow down the mainstream case at all for this fairly
+                     * rare case, so I didn't want to add a test that didn't
+                     * absolutely have to be there in the loop, besides the
+                     * possibility that it would get too complicated for
+                     * optimizers to deal with.  Another possibility is to just
+                     * give up, convert the source to UTF-8, and restart the
+                     * function that way.  Another possibility is to convert
+                     * both what has already been processed and what is yet to
+                     * come separately to UTF-8, then jump into the loop that
+                     * handles UTF-8.  But the most efficient time-wise of the
+                     * ones I could think of is what follows, and turned out to
+                     * not require much extra code.
+                     *
+                     * First, calculate the extra space needed for the
+                     * remainder of the source needing to be in UTF-8.  Except
+                     * for the 'i' in Turkic locales, in UTF-8 strings, the
+                     * uppercase of a character below 256 occupies the same
+                     * number of bytes as the original.  Therefore, the space
+                     * needed is the that number plus the number of characters
+                     * that become two bytes when converted to UTF-8, plus, in
+                     * turkish locales, the number of 'i's. */
+
+                    extra = send - s + variant_under_utf8_count(s, send);
+
+#ifdef USE_LOCALE_CTYPE
+
+                    if (UNLIKELY(*s == 'i')) {  /* We wouldn't get an 'i' here
+                                                   unless are in a Turkic
+                                                   locale */
+                        const U8 * s_peek = s;
+
+                        do {
+                            extra++;
+
+                            s_peek = (U8 *) memchr(s_peek + 1, 'i',
+                                                   send - (s_peek + 1));
+                        } while (s_peek != NULL);
+                    }
+#endif
+
+                    /* Convert what we have so far into UTF-8, telling the
                      * function that we know it should be converted, and to
                      * allow extra space for what we haven't processed yet.
-                    * Assume the worst case space requirements for converting
-                    * what we haven't processed so far: that it will require
-                    * two bytes for each remaining source character, plus the
-                    * NUL at the end.  This may cause the string pointer to
-                    * move, so re-find it. */
+                     *
+                     * This may cause the string pointer to move, so need to
+                     * save and re-find it. */
  
                     len = d - (U8*)SvPVX_const(dest);
                     SvCUR_set(dest, len);
                     len = sv_utf8_upgrade_flags_grow(dest,
                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                               (send -s) * 2 + 1);
+                                                extra
+                                              + 1 /* trailing NUL */ );
                     d = (U8*)SvPVX(dest) + len;
  
-                   /* Now process the remainder of the source, converting to
-                    * upper and UTF-8.  If a resulting byte is invariant in
-                    * UTF-8, output it as-is, otherwise convert to UTF-8 and
-                    * append it to the output. */
-                   for (; s < send; s++) {
-                       (void) _to_upper_title_latin1(*s, d, &len, 'S');
-                       d += len;
-                   }
+                    /* Now process the remainder of the source, simultaneously
+                     * converting to upper and UTF-8.
+                     *
+                     * To avoid extra tests in the loop body, and since the
+                     * loop is so simple, split out the rare Turkic case into
+                     * its own loop */
  
-                   /* Here have processed the whole source; no need to continue
-                    * with the outer loop.  Each character has been converted
-                    * to upper case and converted to UTF-8 */
+#ifdef USE_LOCALE_CTYPE
+                    if (   UNLIKELY(PL_in_utf8_turkic_locale)
+                        && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE)))
+                    {
+                        for (; s < send; s++) {
+                            if (*s == 'i') {
+                                *d++ = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                                *d++ = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
+                            }
+                            else {
+                                (void) _to_upper_title_latin1(*s, d, &len, 'S');
+                                d += len;
+                            }
+                        }
+                    }
+                    else
+#endif
+                        for (; s < send; s++) {
+                            (void) _to_upper_title_latin1(*s, d, &len, 'S');
+                            d += len;
+                        }
  
+                    /* Here have processed the whole source; no need to
+                     * continue with the outer loop.  Each character has been
+                     * converted to upper case and converted to UTF-8. */
                     break;
                 } /* End of processing all latin1-style chars */
             } /* End of processing all chars */
@@ -4278,15 +4382,26 @@ PP(pp_lc)
      SV *dest;
      const U8 *s;
      U8 *d;
+    bool has_turkic_I = FALSE;
  
      SvGETMAGIC(source);
  
      if (   SvPADTMP(source)
         && !SvREADONLY(source) && SvPOK(source)
-       && !DO_UTF8(source)) {
+       && !DO_UTF8(source)
+
+#ifdef USE_LOCALE_CTYPE
+
+        && (   LIKELY(! IN_LC_RUNTIME(LC_CTYPE))
+            || LIKELY(! PL_in_utf8_turkic_locale))
+
+#endif
  
-       /* We can convert in place, as lowercasing anything in the latin1 range
-        * (or else DO_UTF8 would have been on) doesn't lengthen it */
+    ) {
+
+        /* We can convert in place, as, outside of Turkic UTF-8 locales,
+         * lowercasing anything in the latin1 range (or else DO_UTF8 would have
+         * been on) doesn't lengthen it. */
         dest = source;
         s = d = (U8*)SvPV_force_nomg(source, len);
         min = len + 1;
@@ -4305,25 +4420,93 @@ PP(pp_lc)
         SETs(dest);
      }
  
+#ifdef USE_LOCALE_CTYPE
+
+    if (IN_LC_RUNTIME(LC_CTYPE)) {
+        const U8 * next_I;
+
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+
+        /* Lowercasing in a Turkic locale can cause non-UTF-8 to need to become
+         * UTF-8 for the single case of the character 'I' */
+        if (     UNLIKELY(PL_in_utf8_turkic_locale)
+            && ! DO_UTF8(source)
+            &&   (next_I = (U8 *) memchr(s, 'I', len)))
+        {
+            Size_t I_count = 0;
+            const U8 *const send = s + len;
+
+            do {
+                I_count++;
+
+                next_I = (U8 *) memchr(next_I + 1, 'I',
+                                        send - (next_I + 1));
+            } while (next_I != NULL);
+
+            /* Except for the 'I', in UTF-8 strings, the lower case of a
+             * character below 256 occupies the same number of bytes as the
+             * original.  Therefore, the space needed is the original length
+             * plus I_count plus the number of characters that become two bytes
+             * when converted to UTF-8 */
+            sv_utf8_upgrade_flags_grow(dest, 0, len
+                                              + I_count
+                                              + variant_under_utf8_count(s, send)
+                                              + 1 /* Trailing NUL */ );
+            d = (U8*)SvPVX(dest);
+            has_turkic_I = TRUE;
+        }
+    }
+
+#endif
+
      /* Overloaded values may have toggled the UTF-8 flag on source, so we need
         to check DO_UTF8 again here.  */
  
      if (DO_UTF8(source)) {
         const U8 *const send = s + len;
         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
+        bool remove_dot_above = FALSE;
  
         while (s < send) {
             const STRLEN u = UTF8SKIP(s);
             STRLEN ulen;
  
  #ifdef USE_LOCALE_CTYPE
+
             _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
+
+            /* If we are in a Turkic locale, we have to do more work.  As noted
+             * in the comments for lcfirst, there is a special case if a 'I'
+             * is in a grapheme with COMBINING DOT ABOVE UTF8.  It turns into a
+             * 'i', and the DOT must be removed.  We check for that situation,
+             * and set a flag if the DOT is there.  Then each time through the
+             * loop, we have to see if we need to remove the next DOT above,
+             * and if so, do it.  We know that there is a DOT because
+             * _toLOWER_utf8_flags() wouldn't have returned 'i' unless there
+             * was one in a proper position. */
+            if (   UNLIKELY(PL_in_utf8_turkic_locale)
+                && IN_LC_RUNTIME(LC_CTYPE))
+            {
+                if (   UNLIKELY(remove_dot_above)
+                    && memBEGINs(tmpbuf, sizeof(tmpbuf), COMBINING_DOT_ABOVE_UTF8))
+                {
+                    s += u;
+                    remove_dot_above = FALSE;
+                    continue;
+                }
+                else if (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')) {
+                    remove_dot_above = TRUE;
+                }
+            }
  #else
+            PERL_UNUSED_VAR(remove_dot_above);
+
             _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0);
  #endif
  
-           /* Here is where we would do context-sensitive actions.  See the
-            * commit message for 86510fb15 for why there isn't any */
+            /* Here is where we would do context-sensitive actions for the
+             * Greek final sigma.  See the commit message for 86510fb15 for why
+             * there isn't any */
  
             if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
  
@@ -4349,7 +4532,7 @@ PP(pp_lc)
         SvUTF8_on(dest);
         *d = '\0';
         SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
-    } else {   /* Not utf8 */
+    } else {   /* 'source' not utf8 */
         if (len) {
             const U8 *const send = s + len;
  
@@ -4358,9 +4541,22 @@ PP(pp_lc)
              * whole thing in a tight loop, for speed, */
  #ifdef USE_LOCALE_CTYPE
              if (IN_LC_RUNTIME(LC_CTYPE)) {
-                _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-               for (; s < send; d++, s++)
-                   *d = toLOWER_LC(*s);
+                if (LIKELY( ! has_turkic_I)) {
+                    for (; s < send; d++, s++)
+                        *d = toLOWER_LC(*s);
+                }
+                else {  /* This is the only case where lc() converts 'dest'
+                           into UTF-8 from a non-UTF-8 'source' */
+                    for (; s < send; s++) {
+                        if (*s == 'I') {
+                            *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
+                            *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
+                        }
+                        else {
+                            append_utf8_from_native_byte(toLOWER_LATIN1(*s), &d);
+                        }
+                    }
+                }
              }
             else
  #endif
@@ -4420,7 +4616,7 @@ PP(pp_quotemeta)
  #ifdef USE_LOCALE_CTYPE
                     /* In locale, we quote all non-ASCII Latin1 chars.
                      * Otherwise use the quoting rules */
-                   
+
                     IN_LC_RUNTIME(LC_CTYPE)
                         ||
  #endif
@@ -4522,6 +4718,15 @@ PP(pp_fc)
      SETs(dest);
  
      send = s + len;
+
+#ifdef USE_LOCALE_CTYPE
+
+    if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+    }
+
+#endif
+
      if (DO_UTF8(source)) { /* UTF-8 flagged string. */
          while (s < send) {
              const STRLEN u = UTF8SKIP(s);
@@ -4546,7 +4751,6 @@ PP(pp_fc)
              if (IN_UTF8_CTYPE_LOCALE) {
                  goto do_uni_folding;
              }
-            _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
              for (; s < send; d++, s++)
                  *d = (U8) toFOLD_LC(*s);
          }
@@ -4560,52 +4764,80 @@ PP(pp_fc)
  #ifdef USE_LOCALE_CTYPE
        do_uni_folding:
  #endif
-            /* For ASCII and the Latin-1 range, there's only two troublesome
-             * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
-             * casefolding becomes 'ss'; and \x{B5} (\N{MICRO SIGN}), which
-             * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) --
+            /* For ASCII and the Latin-1 range, there's potentially three
+             * troublesome folds:
+             *      \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
+             *             casefolding becomes 'ss';
+             *      \x{B5} (\N{MICRO SIGN}), which under any fold becomes
+             *             \x{3BC} (\N{GREEK SMALL LETTER MU})
+             *      I      only in Turkic locales, this folds to \x{131}
+             *             \N{LATIN SMALL LETTER DOTLESS I}
               * For the rest, the casefold is their lowercase.  */
              for (; s < send; d++, s++) {
-                if (*s == MICRO_SIGN) {
+                if (    UNLIKELY(*s == MICRO_SIGN)
+#ifdef USE_LOCALE_CTYPE
+                    || (   UNLIKELY(PL_in_utf8_turkic_locale)
+                        && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE))
+                        && UNLIKELY(*s == 'I'))
+#endif
+                ) {
+                    Size_t extra = send - s
+                                 + variant_under_utf8_count(s, send);
+
                      /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
-                     * which is outside of the latin-1 range. There's a couple
-                     * of ways to deal with this -- khw discusses them in
-                     * pp_lc/uc, so go there :) What we do here is upgrade what
-                     * we had already casefolded, then enter an inner loop that
-                     * appends the rest of the characters as UTF-8. */
+                     * and 'I' in Turkic locales is \N{LATIN SMALL LETTER
+                     * DOTLESS I} both of which are outside of the latin-1
+                     * range. There's a couple of ways to deal with this -- khw
+                     * discusses them in pp_lc/uc, so go there :) What we do
+                     * here is upgrade what we had already casefolded, then
+                     * enter an inner loop that appends the rest of the
+                     * characters as UTF-8.
+                     *
+                     * First we calculate the needed size of the upgraded dest
+                     * beyond what's been processed already (the upgrade
+                     * function figures that out).  Except for the 'I' in
+                     * Turkic locales, in UTF-8 strings, the fold case of a
+                     * character below 256 occupies the same number of bytes as
+                     * the original (even the Sharp S).  Therefore, the space
+                     * needed is the number of bytes remaining plus the number
+                     * of characters that become two bytes when converted to
+                     * UTF-8 plus, in turkish locales, the number of 'I's */
+
+                    if (UNLIKELY(*s == 'I')) {
+                        const U8 * s_peek = s;
+
+                        do {
+                            extra++;
+
+                            s_peek = (U8 *) memchr(s_peek + 1, 'i',
+                                                   send - (s_peek + 1));
+                        } while (s_peek != NULL);
+                    }
+
+                    /* Growing may move things, so have to save and recalculate
+                     * 'd' */
                      len = d - (U8*)SvPVX_const(dest);
                      SvCUR_set(dest, len);
                      len = sv_utf8_upgrade_flags_grow(dest,
                                                  SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                               /* The max expansion for latin1
-                                                * chars is 1 byte becomes 2 */
-                                                (send -s) * 2 + 1);
+                                                extra
+                                              + 1 /* Trailing NUL */ );
                      d = (U8*)SvPVX(dest) + len;
  
-                    Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
-                    d += small_mu_len;
+                    *d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU);
+                    *d++ = UTF8_TWO_BYTE_LO(GREEK_SMALL_LETTER_MU);
                      s++;
+
                      for (; s < send; s++) {
                          STRLEN ulen;
-                        UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags);
-                        if UVCHR_IS_INVARIANT(fc) {
-                            if (full_folding
-                                && *s == LATIN_SMALL_LETTER_SHARP_S)
-                            {
-                                *d++ = 's';
-                                *d++ = 's';
-                            }
-                            else
-                                *d++ = (U8)fc;
-                        }
-                        else {
-                            Copy(tmpbuf, d, ulen, U8);
-                            d += ulen;
-                        }
+                        _to_uni_fold_flags(*s, d, &ulen, flags);
+                        d += ulen;
                      }
                      break;
                  }
-                else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) {
+                else if (   UNLIKELY(*s == LATIN_SMALL_LETTER_SHARP_S)
+                         && full_folding)
+                {
                      /* Under full casefolding, LATIN SMALL LETTER SHARP S
                       * becomes "ss", which may require growing the SV. */
                      if (SvLEN(dest) < ++min) {
@@ -4615,8 +4847,7 @@ PP(pp_fc)
                      *(d)++ = 's';
                      *d = 's';
                  }
-                else { /* If it's not one of those two, the fold is their lower
-                          case */
+                else { /* Else, the fold is the lower case */
                      *d = toLOWER_LATIN1(*s);
                  }
               }
@@ -4757,7 +4988,7 @@ PP(pp_aeach)
      IV *iterp = Perl_av_iter_p(aTHX_ array);
      const IV current = (*iterp)++;
  
-    if (current > av_tindex(array)) {
+    if (current > av_top_index(array)) {
         *iterp = 0;
         if (gimme == G_SCALAR)
             RETPUSHUNDEF;
@@ -4785,7 +5016,7 @@ PP(pp_akeys)
  
      if (gimme == G_SCALAR) {
         dTARGET;
-       PUSHi(av_tindex(array) + 1);
+       PUSHi(av_count(array));
      }
      else if (gimme == G_ARRAY) {
        if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) {
@@ -4796,7 +5027,7 @@ PP(pp_akeys)
                        "Can't modify keys on array in list assignment");
        }
        {
-        IV n = Perl_av_len(aTHX_ array);
+        IV n = av_top_index(array);
          IV i;
  
          EXTEND(SP, n + 1);
@@ -5108,7 +5339,7 @@ PP(pp_hslice)
                  DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
              }
              if (localizing) {
-               if (HvNAME_get(hv) && isGV(*svp))
+               if (HvNAME_get(hv) && isGV_or_RVCV(*svp))
                     save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
                 else if (preeminent)
                     save_helem_flags(hv, keysv, svp,
@@ -5183,8 +5414,11 @@ PP(pp_list)
  {
      I32 markidx = POPMARK;
      if (GIMME_V != G_ARRAY) {
-       SV **mark = PL_stack_base + markidx;
+        /* don't initialize mark here, EXTEND() may move the stack */
+        SV **mark;
         dSP;
+        EXTEND(SP, 1);          /* in case no arguments, as in @empty */
+        mark = PL_stack_base + markidx;
         if (++MARK <= SP)
             *MARK = *SP;                /* unwanted list, return last item */
         else
@@ -5209,6 +5443,7 @@ PP(pp_lslice)
  
      if (GIMME_V != G_ARRAY) {
          if (lastlelem < firstlelem) {
+            EXTEND(SP, 1);
              *firstlelem = &PL_sv_undef;
          }
          else {
@@ -5310,6 +5545,9 @@ PP(pp_splice)
                                     sp - mark);
      }
  
+    if (SvREADONLY(ary))
+        Perl_croak_no_modify();
+
      SP++;
  
      if (++MARK < SP) {
@@ -5420,7 +5658,7 @@ PP(pp_splice)
         i = -diff;
         while (i)
             dst[--i] = NULL;
-       
+
         if (newlen) {
             Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
             Safefree(tmparyval);
@@ -5621,7 +5859,7 @@ PP(pp_reverse)
                 const MAGIC *mg;
                 bool can_preserve = SvCANEXISTDELETE(av);
  
-               for (i = 0, j = av_tindex(av); i < j; ++i, --j) {
+               for (i = 0, j = av_top_index(av); i < j; ++i, --j) {
                     SV *begin, *end;
  
                     if (can_preserve) {
@@ -5680,11 +5918,18 @@ PP(pp_reverse)
         STRLEN len;
  
         SvUTF8_off(TARG);                               /* decontaminate */
-       if (SP - MARK > 1)
+       if (SP - MARK > 1) {
             do_join(TARG, &PL_sv_no, MARK, SP);
-       else {
-           sv_setsv(TARG, SP > MARK ? *SP : DEFSV);
+           SP = MARK + 1;
+           SETs(TARG);
+       } else if (SP > MARK) {
+           sv_setsv(TARG, *SP);
+           SETs(TARG);
+        } else {
+           sv_setsv(TARG, DEFSV);
+           XPUSHs(TARG);
         }
+        SvSETMAGIC(TARG); /* remove any utf8 length magic */
  
         up = SvPV_force(TARG, len);
         if (len > 1) {
@@ -5721,8 +5966,6 @@ PP(pp_reverse)
             }
             (void)SvPOK_only_UTF8(TARG);
         }
-       SP = MARK + 1;
-       SETTARG;
      }
      RETURN;
  }
@@ -5738,6 +5981,7 @@ PP(pp_split)
      STRLEN len;
      const char *s = SvPV_const(sv, len);
      const bool do_utf8 = DO_UTF8(sv);
+    const bool in_uni_8_bit = IN_UNI_8_BIT;
      const char *strend = s + len;
      PMOP *pm = cPMOPx(PL_op);
      REGEXP *rx;
@@ -5767,6 +6011,7 @@ PP(pp_split)
  
      /* handle @ary = split(...) optimisation */
      if (PL_op->op_private & OPpSPLIT_ASSIGN) {
+       realarray = 1;
          if (!(PL_op->op_flags & OPf_STACKED)) {
              if (PL_op->op_private & OPpSPLIT_LEX) {
                  if (PL_op->op_private & OPpLVAL_INTRO)
@@ -5789,26 +6034,13 @@ PP(pp_split)
              oldsave = PL_savestack_ix;
          }
  
-       realarray = 1;
-       PUTBACK;
-       av_extend(ary,0);
-       (void)sv_2mortal(SvREFCNT_inc_simple_NN(sv));
-       av_clear(ary);
-       SPAGAIN;
+       /* Some defence against stack-not-refcounted bugs */
+       (void)sv_2mortal(SvREFCNT_inc_simple_NN(ary));
+
         if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
             PUSHMARK(SP);
             XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
-       }
-       else {
-           if (!AvREAL(ary)) {
-               I32 i;
-               AvREAL_on(ary);
-               AvREIFY_off(ary);
-               for (i = AvFILLp(ary); i >= 0; i--)
-                   AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
-           }
-           /* temporarily switch stacks */
-           SAVESWITCHSTACK(PL_curstack, ary);
+       } else {
             make_mortal = 0;
         }
      }
@@ -5824,6 +6056,10 @@ PP(pp_split)
             while (s < strend && isSPACE_LC(*s))
                 s++;
         }
+        else if (in_uni_8_bit) {
+            while (s < strend && isSPACE_L1(*s))
+                s++;
+        }
         else {
             while (s < strend && isSPACE(*s))
                 s++;
@@ -5855,10 +6091,14 @@ PP(pp_split)
              {
                 while (m < strend && !isSPACE_LC(*m))
                     ++m;
+            }
+            else if (in_uni_8_bit) {
+                while (m < strend && !isSPACE_L1(*m))
+                    ++m;
              } else {
                  while (m < strend && !isSPACE(*m))
                      ++m;
-            }  
+            }
             if (m >= strend)
                 break;
  
@@ -5889,10 +6129,14 @@ PP(pp_split)
              {
                 while (s < strend && isSPACE_LC(*s))
                     ++s;
+            }
+            else if (in_uni_8_bit) {
+                while (s < strend && isSPACE_L1(*s))
+                    ++s;
              } else {
                  while (s < strend && isSPACE(*s))
                      ++s;
-            }      
+            }
         }
      }
      else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
@@ -5918,62 +6162,52 @@ PP(pp_split)
         }
      }
      else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
-        /*
-          Pre-extend the stack, either the number of bytes or
-          characters in the string or a limited amount, triggered by:
-
-          my ($x, $y) = split //, $str;
-            or
-          split //, $str, $i;
-        */
-       if (!gimme_scalar) {
-           const IV items = limit - 1;
-            /* setting it to -1 will trigger a panic in EXTEND() */
-            const SSize_t sslen = slen > SSize_t_MAX ?  -1 : (SSize_t)slen;
-           if (items >=0 && items < sslen)
-               EXTEND(SP, items);
-           else
-               EXTEND(SP, sslen);
-       }
-
-        if (do_utf8) {
-            while (--limit) {
-                /* keep track of how many bytes we skip over */
-                m = s;
-                s += UTF8SKIP(s);
-               if (gimme_scalar) {
-                   iters++;
-                   if (s-m == 0)
-                       trailing_empty++;
-                   else
-                       trailing_empty = 0;
-               } else {
-                   dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
-
-                   PUSHs(dstr);
-               }
-
-                if (s >= strend)
-                    break;
+        /* This case boils down to deciding which is the smaller of:
+         * limit - effectively a number of characters
+         * slen - which already contains the number of characters in s
+         *
+         * The resulting number is the number of iters (for gimme_scalar)
+         * or the number of SVs to create (!gimme_scalar). */
+
+        /* setting it to -1 will trigger a panic in EXTEND() */
+        const SSize_t sslen = slen > SSize_t_MAX ?  -1 : (SSize_t)slen;
+        const IV items = limit - 1;
+        if (sslen < items || items < 0) {
+            iters = slen -1;
+            limit = slen + 1;
+            /* Note: The same result is returned if the following block
+             * is removed, because of the "keep field after final delim?"
+             * adjustment, but having the following makes the "correct"
+             * behaviour more apparent. */
+            if (gimme_scalar) {
+                s = strend;
+                iters++;
              }
          } else {
-            while (--limit) {
-               if (gimme_scalar) {
-                   iters++;
-               } else {
-                   dstr = newSVpvn(s, 1);
-
-
-                   if (make_mortal)
-                       sv_2mortal(dstr);
-
-                   PUSHs(dstr);
-               }
-
-                s++;
-
-                if (s >= strend)
-                    break;
+            iters = items;
+        }
+        if (!gimme_scalar) {
+            /*
+              Pre-extend the stack, either the number of bytes or
+              characters in the string or a limited amount, triggered by:
+              my ($x, $y) = split //, $str;
+                or
+              split //, $str, $i;
+            */
+            EXTEND(SP, limit);
+            if (do_utf8) {
+                while (--limit) {
+                    m = s;
+                    s += UTF8SKIP(s);
+                    dstr = newSVpvn_flags(m, s-m, SVf_UTF8 | make_mortal);
+                    PUSHs(dstr);
+                }
+            } else {
+                while (--limit) {
+                    dstr = newSVpvn_flags(s, 1, make_mortal);
+                    PUSHs(dstr);
+                    s++;
+                }
              }
          }
      }
@@ -6006,7 +6240,7 @@ PP(pp_split)
                 /* The rx->minlen is in characters but we want to step
                  * s ahead by bytes. */
                 if (do_utf8)
-                   s = (char*)utf8_hop((U8*)m, len);
+                   s = (char*)utf8_hop_forward((U8*) m, len, (U8*) strend);
                 else
                     s = m + len; /* Fake \n at the end */
             }
@@ -6030,7 +6264,7 @@ PP(pp_split)
                 /* The rx->minlen is in characters but we want to step
                  * s ahead by bytes. */
                 if (do_utf8)
-                   s = (char*)utf8_hop((U8*)m, len);
+                   s = (char*)utf8_hop_forward((U8*)m, len, (U8 *) strend);
                 else
                     s = m + len; /* Fake \n at the end */
             }
@@ -6125,32 +6359,59 @@ PP(pp_split)
      }
  
      PUTBACK;
-    LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
+    LEAVE_SCOPE(oldsave);
      SPAGAIN;
      if (realarray) {
-       if (!mg) {
-           if (SvSMAGICAL(ary)) {
-               PUTBACK;
+        if (!mg) {
+            PUTBACK;
+            if(AvREAL(ary)) {
+                if (av_count(ary) > 0)
+                    av_clear(ary);
+            } else {
+                AvREAL_on(ary);
+                AvREIFY_off(ary);
+
+                if (AvMAX(ary) > -1) {
+                    /* don't free mere refs */
+                    Zero(AvARRAY(ary), AvMAX(ary), SV*);
+                }
+            }
+            if(AvMAX(ary) < iters)
+                av_extend(ary,iters);
+            SPAGAIN;
+
+            /* Need to copy the SV*s from the stack into ary */
+            Copy(SP + 1 - iters, AvARRAY(ary), iters, SV*);
+            AvFILLp(ary) = iters - 1;
+
+            if (SvSMAGICAL(ary)) {
+                PUTBACK;
                 mg_set(MUTABLE_SV(ary));
                 SPAGAIN;
-           }
-           if (gimme == G_ARRAY) {
-               EXTEND(SP, iters);
-               Copy(AvARRAY(ary), SP + 1, iters, SV*);
-               SP += iters;
-               RETURN;
-           }
+            }
+
+            if (gimme != G_ARRAY) {
+                /* SP points to the final SV* pushed to the stack. But the SV*  */
+                /* are not going to be used from the stack. Point SP to below   */
+                /* the first of these SV*.                                      */
+                SP -= iters;
+                PUTBACK;
+            }
         }
         else {
-           PUTBACK;
-           ENTER_with_name("call_PUSH");
-           call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
-           LEAVE_with_name("call_PUSH");
-           SPAGAIN;
+            PUTBACK;
+            av_extend(ary,iters);
+            av_clear(ary);
+
+            ENTER_with_name("call_PUSH");
+            call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
+            LEAVE_with_name("call_PUSH");
+            SPAGAIN;
+
             if (gimme == G_ARRAY) {
                 SSize_t i;
                 /* EXTEND should not be needed - we just popped them */
-               EXTEND(SP, iters);
+               EXTEND_SKIP(SP, iters);
                 for (i=0; i < iters; i++) {
                     SV **svp = av_fetch(ary, i, FALSE);
                     PUSHs((svp) ? *svp : &PL_sv_undef);
@@ -6159,13 +6420,12 @@ PP(pp_split)
             }
         }
      }
-    else {
-       if (gimme == G_ARRAY)
-           RETURN;
-    }
  
-    GETTARGET;
-    XPUSHi(iters);
+    if (gimme != G_ARRAY) {
+        GETTARGET;
+        XPUSHi(iters);
+     }
+
      RETURN;
  }
  
@@ -6208,11 +6468,11 @@ PP(unimplemented_op)
         Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
         if we get here for a custom op then that means that the custom op didn't
         have an implementation. Given that OP_NAME() looks up the custom op
-       by its pp_addr, likely it will return NULL, unless someone (unhelpfully)
-       registers &PL_unimplemented_op as the address of their custom op.
+       by its op_ppaddr, likely it will return NULL, unless someone (unhelpfully)
+       registers &Perl_unimplemented_op as the address of their custom op.
         NULL doesn't generate a useful error message. "custom" does. */
      const char *const name = op_type >= OP_max
-       ? "[out of range]" : PL_op_name[PL_op->op_type];
+       ? "[out of range]" : PL_op_name[op_type];
      if(OP_IS_SOCKET(op_type))
         DIE(aTHX_ PL_no_sock_func, name);
      DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
@@ -6393,9 +6653,22 @@ PP(pp_coreargs)
      RETURN;
  }
  
+/* Implement CORE::keys(),values(),each().
+ *
+ * We won't know until run-time whether the arg is an array or hash,
+ * so this op calls
+ *
+ *    pp_keys/pp_values/pp_each
+ * or
+ *    pp_akeys/pp_avalues/pp_aeach
+ *
+ * as appropriate (or whatever pp function actually implements the OP_FOO
+ * functionality for each FOO).
+ */
+
  PP(pp_avhvswitch)
  {
-    dVAR; dSP;
+    dSP;
      return PL_ppaddr[
                 (SvTYPE(TOPs) == SVt_PVAV ? OP_AEACH : OP_EACH)
                     + (PL_op->op_private & OPpAVHVSWITCH_MASK)
@@ -6563,7 +6836,7 @@ PP(pp_lvref)
          }
        }
        else if (arg) {
-       S_localise_gv_slot(aTHX_ (GV *)arg, 
+       S_localise_gv_slot(aTHX_ (GV *)arg,
                                  PL_op->op_private & OPpLVREF_TYPE);
        }
        else if (!(PL_op->op_private & OPpPAD_STATE))
@@ -6602,10 +6875,12 @@ PP(pp_lvrefslice)
  
      while (++MARK <= SP) {
         SV * const elemsv = *MARK;
-       if (SvTYPE(av) == SVt_PVAV)
-           S_localise_aelem_lval(aTHX_ av, elemsv, can_preserve);
-       else
-           S_localise_helem_lval(aTHX_ (HV *)av, elemsv, can_preserve);
+        if (UNLIKELY(localizing)) {
+            if (SvTYPE(av) == SVt_PVAV)
+                S_localise_aelem_lval(aTHX_ av, elemsv, can_preserve);
+            else
+                S_localise_helem_lval(aTHX_ (HV *)av, elemsv, can_preserve);
+        }
         *MARK = sv_2mortal(newSV_type(SVt_PVMG));
         sv_magic(*MARK,(SV *)av,PERL_MAGIC_lvref,(char *)elemsv,HEf_SVKEY);
      }
@@ -6644,7 +6919,7 @@ PP(pp_anonconst)
   *  for $:   (OPf_STACKED ? *sp : $_[N])
   *  for @/%: @_[N..$#_]
   *
- * It's equivalent to 
+ * It's equivalent to
   *    my $foo = $_[N];
   * or
   *    my $foo = (value-on-stack)
@@ -6845,7 +7120,7 @@ S_find_runcv_name(void)
      return sv;
  }
  
-/* Check a  a subs arguments - i.e. that it has the correct number of args
+/* Check a sub's arguments - i.e. that it has the correct number of args
   * (and anything else we might think of in future). Typically used with
   * signatured subs.
   */
@@ -6853,16 +7128,16 @@ S_find_runcv_name(void)
  PP(pp_argcheck)
  {
      OP * const o       = PL_op;
-    UNOP_AUX_item *aux = cUNOP_AUXo->op_aux;
-    IV   params        = aux[0].iv;
-    IV   opt_params    = aux[1].iv;
-    char slurpy        = (char)(aux[2].iv);
+    struct op_argcheck_aux *aux = (struct op_argcheck_aux *)cUNOP_AUXo->op_aux;
+    UV   params        = aux->params;
+    UV   opt_params    = aux->opt_params;
+    char slurpy        = aux->slurpy;
      AV  *defav         = GvAV(PL_defgv); /* @_ */
-    IV   argc;
+    UV   argc;
      bool too_few;
  
      assert(!SvMAGICAL(defav));
-    argc = (AvFILLp(defav) + 1);
+    argc = (UV)(AvFILLp(defav) + 1);
      too_few = (argc < (params - opt_params));
  
      if (UNLIKELY(too_few || (!slurpy && argc > params)))
@@ -6879,6 +7154,42 @@ PP(pp_argcheck)
      return NORMAL;
  }
  
+PP(pp_isa)
+{
+    dSP;
+    SV *left, *right;
+
+    right = POPs;
+    left  = TOPs;
+
+    SETs(boolSV(sv_isa_sv(left, right)));
+    RETURN;
+}
+
+PP(pp_cmpchain_and)
+{
+    dSP;
+    SV *result = POPs;
+    PUTBACK;
+    if (SvTRUE_NN(result)) {
+       return cLOGOP->op_other;
+    } else {
+       TOPs = result;
+       return NORMAL;
+    }
+}
+
+PP(pp_cmpchain_dup)
+{
+    dSP;
+    SV *right = TOPs;
+    SV *left = TOPm1s;
+    TOPm1s = right;
+    TOPs = left;
+    XPUSHs(right);
+    RETURN;
+}
+
  /*
   * ex: set ts=8 sts=4 sw=4 et:
   */