Update corelist for 5.17.10

[perl5.git] / pp.c
diff --git a/pp.c b/pp.c

index e587f7d..14ba91d 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -84,6 +84,7 @@ PP(pp_padav)
      }
      gimme = GIMME_V;
      if (gimme == G_ARRAY) {
+        /* XXX see also S_pushav in pp_hot.c */
         const I32 maxarg = AvFILL(MUTABLE_AV(TARG)) + 1;
         EXTEND(SP, maxarg);
         if (SvMAGICAL(TARG)) {
@@ -163,7 +164,8 @@ PP(pp_clonecv)
  {
      dVAR; dTARGET;
      MAGIC * const mg =
-       mg_find(AvARRAY(PL_comppad_name)[ARGTARG], PERL_MAGIC_proto);
+       mg_find(PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG],
+               PERL_MAGIC_proto);
      assert(SvTYPE(TARG) == SVt_PVCV);
      assert(mg);
      assert(mg->mg_obj);
@@ -172,7 +174,7 @@ PP(pp_clonecv)
         /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV
            to introcv and remove the SvPADSTALE_off. */
         SAVEPADSVANDMORTALIZE(ARGTARG);
-       PAD_SVl(ARGTARG) = mg->mg_obj;
+       PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(mg->mg_obj);
      }
      else {
         if (CvROOT(mg->mg_obj)) {
@@ -229,7 +231,7 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
                 if (vivify_sv && sv != &PL_sv_undef) {
                     GV *gv;
                     if (SvREADONLY(sv))
-                       Perl_croak_no_modify(aTHX);
+                       Perl_croak_no_modify();
                     if (cUNOP->op_targ) {
                         SV * const namesv = PAD_SV(cUNOP->op_targ);
                         gv = MUTABLE_GV(newSV(0));
@@ -238,7 +240,7 @@ S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
                     else {
                         const char * const name = CopSTASHPV(PL_curcop);
                         gv = newGVgen_flags(name,
-                                        HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
+                                HvNAMEUTF8(CopSTASH(PL_curcop)) ? SVf_UTF8 : 0 );
                     }
                     prepare_SV_for_RV(sv);
                     SvRV_set(sv, MUTABLE_SV(gv));
@@ -423,7 +425,7 @@ PP(pp_pos)
      dVAR; dSP; dPOPss;
  
      if (PL_op->op_flags & OPf_MOD || LVRET) {
-       SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));  /* Not TARG RT#67838 */
+       SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */
         sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
         LvTYPE(ret) = '.';
         LvTARG(ret) = SvREFCNT_inc_simple(sv);
@@ -453,7 +455,8 @@ PP(pp_rv2cv)
      HV *stash_unused;
      const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
         ? GV_ADDMG
-       : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT)) == OPpMAY_RETURN_CONSTANT)
+       : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT))
+                                                    == OPpMAY_RETURN_CONSTANT)
             ? GV_ADD|GV_NOEXPAND
             : GV_ADD;
      /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
@@ -486,7 +489,8 @@ PP(pp_prototype)
             if (!code || code == -KEY_CORE)
                 DIE(aTHX_ "Can't find an opnumber for \"%"SVf"\"",
                     SVfARG(newSVpvn_flags(
-                       s+6, SvCUR(TOPs)-6, SvFLAGS(TOPs) & SVf_UTF8
+                       s+6, SvCUR(TOPs)-6,
+                       (SvFLAGS(TOPs) & SVf_UTF8)|SVs_TEMP
                     )));
             {
                 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
@@ -636,7 +640,12 @@ PP(pp_gelem)
         switch (*elem) {
         case 'A':
             if (len == 5 && strEQ(second_letter, "RRAY"))
+           {
                 tmpRef = MUTABLE_SV(GvAV(gv));
+               if (tmpRef && !AvREAL((const AV *)tmpRef)
+                && AvREIFY((const AV *)tmpRef))
+                   av_reify(MUTABLE_AV(tmpRef));
+           }
             break;
         case 'C':
             if (len == 4 && strEQ(second_letter, "ODE"))
@@ -770,12 +779,10 @@ S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
         return;
      }
      else if (SvREADONLY(sv)) {
-        if (SvFAKE(sv)) {
-            /* SV is copy-on-write */
-           sv_force_normal_flags(sv, 0);
-        }
-        else
-            Perl_croak_no_modify(aTHX);
+            Perl_croak_no_modify();
+    }
+    else if (SvIsCOW(sv)) {
+       sv_force_normal_flags(sv, 0);
      }
  
      if (PL_encoding) {
@@ -979,11 +986,9 @@ PP(pp_undef)
         }
         break;
      case SVt_PVGV:
-       if (SvFAKE(sv)) {
-           SvSetMagicSV(sv, &PL_sv_undef);
-           break;
-       }
-       else if (isGV_with_GP(sv)) {
+       assert(isGV_with_GP(sv));
+       assert(!SvFAKE(sv));
+       {
             GP *gp;
              HV *stash;
  
@@ -1021,7 +1026,6 @@ PP(pp_undef)
  
             break;
         }
-       /* FALL THROUGH */
      default:
         if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
             SvPV_free(sv);
@@ -1041,7 +1045,7 @@ PP(pp_postinc)
      const bool inc =
         PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
      if (SvTYPE(TOPs) >= SVt_PVAV || (isGV_with_GP(TOPs) && !SvFAKE(TOPs)))
-       Perl_croak_no_modify(aTHX);
+       Perl_croak_no_modify();
      if (SvROK(TOPs))
         TARG = sv_newmortal();
      sv_setsv(TARG, TOPs);
@@ -1643,7 +1647,7 @@ PP(pp_repeat)
  
      if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
         dMARK;
-       static const char oom_list_extend[] = "Out of memory during list extend";
+       static const char* const oom_list_extend = "Out of memory during list extend";
         const I32 items = SP - MARK;
         const I32 max = items * count;
  
@@ -1695,7 +1699,7 @@ PP(pp_repeat)
         SV * const tmpstr = POPs;
         STRLEN len;
         bool isutf;
-       static const char oom_string_extend[] =
+       static const char* const oom_string_extend =
           "Out of memory during string extend";
  
         if (TARG != tmpstr)
@@ -2240,7 +2244,8 @@ PP(pp_negate)
             if (SvIsUV(sv)) {
                 if (SvIVX(sv) == IV_MIN) {
                     /* 2s complement assumption. */
-                   SETi(SvIVX(sv));    /* special case: -((UV)IV_MAX+1) == IV_MIN */
+                    SETi(SvIVX(sv));   /* special case: -((UV)IV_MAX+1) ==
+                                           IV_MIN */
                     RETURN;
                 }
                 else if (SvUVX(sv) <= IV_MAX) {
@@ -2698,24 +2703,37 @@ extern double drand48 (void);
  
  PP(pp_rand)
  {
-    dVAR; dSP; dTARGET;
-    NV value;
-    if (MAXARG < 1)
-       value = 1.0;
-    else if (!TOPs) {
-       value = 1.0; (void)POPs;
-    }
-    else
-       value = POPn;
-    if (value == 0.0)
-       value = 1.0;
+    dVAR;
      if (!PL_srand_called) {
         (void)seedDrand01((Rand_seed_t)seed());
         PL_srand_called = TRUE;
      }
-    value *= Drand01();
-    XPUSHn(value);
-    RETURN;
+    {
+       dSP;
+       NV value;
+       EXTEND(SP, 1);
+    
+       if (MAXARG < 1)
+           value = 1.0;
+       else {
+           SV * const sv = POPs;
+           if(!sv)
+               value = 1.0;
+           else
+               value = SvNV(sv);
+       }
+    /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */
+       if (value == 0.0)
+           value = 1.0;
+       {
+           dTARGET;
+           PUSHs(TARG);
+           PUTBACK;
+           value *= Drand01();
+           sv_setnv_mg(TARG, value);
+       }
+    }
+    return NORMAL;
  }
  
  PP(pp_srand)
@@ -2890,35 +2908,16 @@ PP(pp_length)
      dVAR; dSP; dTARGET;
      SV * const sv = TOPs;
  
-    if (SvGAMAGIC(sv)) {
-       /* For an overloaded or magic scalar, we can't know in advance if
-          it's going to be UTF-8 or not. Also, we can't call sv_len_utf8 as
-          it likes to cache the length. Maybe that should be a documented
-          feature of it.
-       */
-       STRLEN len;
-       const char *const p
-           = sv_2pv_flags(sv, &len,
-                          SV_UNDEF_RETURNS_NULL|SV_CONST_RETURN|SV_GMAGIC);
-
-       if (!p) {
-           if (!SvPADTMP(TARG)) {
-               sv_setsv(TARG, &PL_sv_undef);
-               SETTARG;
-           }
-           SETs(&PL_sv_undef);
-       }
-       else if (DO_UTF8(sv)) {
-           SETi(utf8_length((U8*)p, (U8*)p + len));
-       }
+    SvGETMAGIC(sv);
+    if (SvOK(sv)) {
+       if (!IN_BYTES)
+           SETi(sv_len_utf8_nomg(sv));
         else
+       {
+           STRLEN len;
+           (void)SvPV_nomg_const(sv,len);
             SETi(len);
-    } else if (SvOK(sv)) {
-       /* Neither magic nor overloaded.  */
-       if (DO_UTF8(sv))
-           SETi(sv_len_utf8(sv));
-       else
-           SETi(sv_len(sv));
+       }
      } else {
         if (!SvPADTMP(TARG)) {
             sv_setsv_nomg(TARG, &PL_sv_undef);
@@ -3016,7 +3015,6 @@ PP(pp_substr)
      STRLEN repl_len;
      int num_args = PL_op->op_private & 7;
      bool repl_need_utf8_upgrade = FALSE;
-    bool repl_is_utf8 = FALSE;
  
      if (num_args > 2) {
         if (num_args > 3) {
@@ -3037,17 +3035,7 @@ PP(pp_substr)
         repl_sv = POPs;
      }
      PUTBACK;
-    if (repl_sv) {
-       repl = SvPV_const(repl_sv, repl_len);
-       repl_is_utf8 = DO_UTF8(repl_sv) && repl_len;
-       if (repl_is_utf8) {
-           if (!DO_UTF8(sv))
-               sv_utf8_upgrade(sv);
-       }
-       else if (DO_UTF8(sv))
-           repl_need_utf8_upgrade = TRUE;
-    }
-    else if (lvalue) {
+    if (lvalue && !repl_sv) {
         SV * ret;
         ret = sv_2mortal(newSV_type(SVt_PVLV));  /* Not TARG RT#67838 */
         sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
@@ -3066,9 +3054,26 @@ PP(pp_substr)
         PUSHs(ret);    /* avoid SvSETMAGIC here */
         RETURN;
      }
-    tmps = SvPV_const(sv, curlen);
+    if (repl_sv) {
+       repl = SvPV_const(repl_sv, repl_len);
+       SvGETMAGIC(sv);
+       if (SvROK(sv))
+           Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
+                           "Attempt to use reference as lvalue in substr"
+           );
+       tmps = SvPV_force_nomg(sv, curlen);
+       if (DO_UTF8(repl_sv) && repl_len) {
+           if (!DO_UTF8(sv)) {
+               sv_utf8_upgrade_nomg(sv);
+               curlen = SvCUR(sv);
+           }
+       }
+       else if (DO_UTF8(sv))
+           repl_need_utf8_upgrade = TRUE;
+    }
+    else tmps = SvPV_const(sv, curlen);
      if (DO_UTF8(sv)) {
-        utf8_curlen = sv_len_utf8_nomg(sv);
+        utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen);
         if (utf8_curlen == curlen)
             utf8_curlen = 0;
         else
@@ -3086,7 +3091,7 @@ PP(pp_substr)
  
         byte_len = len;
         byte_pos = utf8_curlen
-           ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
+           ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos;
  
         tmps += byte_pos;
  
@@ -3108,17 +3113,10 @@ PP(pp_substr)
                 repl_sv_copy = newSVsv(repl_sv);
                 sv_utf8_upgrade(repl_sv_copy);
                 repl = SvPV_const(repl_sv_copy, repl_len);
-               repl_is_utf8 = DO_UTF8(repl_sv_copy) && repl_len;
             }
-           if (SvROK(sv))
-               Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
-                           "Attempt to use reference as lvalue in substr"
-               );
             if (!SvOK(sv))
                 sv_setpvs(sv, "");
             sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
-           if (repl_is_utf8)
-               SvUTF8_on(sv);
             SvREFCNT_dec(repl_sv_copy);
         }
      }
@@ -3550,10 +3548,10 @@ PP(pp_ucfirst)
                       * replace just the first character in place. */
                     inplace = FALSE;
  
-                   /* If the result won't fit in a byte, the entire result will
-                    * have to be in UTF-8.  Assume worst case sizing in
-                    * conversion. (all latin1 characters occupy at most two bytes
-                    * in utf8) */
+                    /* If the result won't fit in a byte, the entire result
+                     * will have to be in UTF-8.  Assume worst case sizing in
+                     * conversion. (all latin1 characters occupy at most two
+                     * bytes in utf8) */
                     if (title_ord > 255) {
                         doing_utf8 = TRUE;
                         convert_source_to_utf8 = TRUE;
@@ -3761,7 +3759,7 @@ PP(pp_uc)
             STRLEN u;
             STRLEN ulen;
             UV uv;
-           if (in_iota_subscript && ! is_utf8_mark(s)) {
+           if (in_iota_subscript && ! _is_utf8_mark(s)) {
  
                 /* A non-mark.  Time to output the iota subscript */
  #define GREEK_CAPITAL_LETTER_IOTA 0x0399
@@ -3834,7 +3832,9 @@ PP(pp_uc)
             else {
                 for (; s < send; d++, s++) {
                     *d = toUPPER_LATIN1_MOD(*s);
-                   if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) continue;
+                   if (LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
+                        continue;
+                    }
  
                     /* The mainstream case is the tight loop above.  To avoid
                      * extra tests in that, all three characters that require
@@ -4212,21 +4212,19 @@ PP(pp_fc)
                  *d = toLOWER(*s);
          }
          else {
-            /* For ASCII and the Latin-1 range, there's only two troublesome folds,
-            * \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full casefolding
-            * becomes 'ss', and \x{B5} (\N{MICRO SIGN}), which under any fold becomes
-            * \x{3BC} (\N{GREEK SMALL LETTER MU}) -- For the rest, the casefold is
-            * their lowercase.
-            */
+            /* For ASCII and the Latin-1 range, there's only two troublesome
+             * folds, \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
+             * casefolding becomes 'ss', and \x{B5} (\N{MICRO SIGN}), which
+             * under any fold becomes \x{3BC} (\N{GREEK SMALL LETTER MU}) --
+             * For the rest, the casefold is their lowercase.  */
              for (; s < send; d++, s++) {
                  if (*s == MICRO_SIGN) {
-                    /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU}, which
-                    * is outside of the latin-1 range. There's a couple of ways to
-                    * deal with this -- khw discusses them in pp_lc/uc, so go there :)
-                    * What we do here is upgrade what we had already casefolded,
-                    * then enter an inner loop that appends the rest of the characters
-                    * as UTF-8.
-                    */
+                    /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
+                     * which is outside of the latin-1 range. There's a couple
+                     * of ways to deal with this -- khw discusses them in
+                     * pp_lc/uc, so go there :) What we do here is upgrade what
+                     * we had already casefolded, then enter an inner loop that
+                     * appends the rest of the characters as UTF-8. */
                      len = d - (U8*)SvPVX_const(dest);
                      SvCUR_set(dest, len);
                      len = sv_utf8_upgrade_flags_grow(dest,
@@ -4242,7 +4240,9 @@ PP(pp_fc)
                          STRLEN ulen;
                          UV fc = _to_uni_fold_flags(*s, tmpbuf, &ulen, flags);
                          if UNI_IS_INVARIANT(fc) {
-                            if ( full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) {
+                            if (full_folding
+                                && *s == LATIN_SMALL_LETTER_SHARP_S)
+                            {
                                  *d++ = 's';
                                  *d++ = 's';
                              }
@@ -4257,9 +4257,8 @@ PP(pp_fc)
                      break;
                  }
                  else if (full_folding && *s == LATIN_SMALL_LETTER_SHARP_S) {
-                    /* Under full casefolding, LATIN SMALL LETTER SHARP S becomes "ss",
-                    * which may require growing the SV.
-                    */
+                    /* Under full casefolding, LATIN SMALL LETTER SHARP S
+                     * becomes "ss", which may require growing the SV. */
                      if (SvLEN(dest) < ++min) {
                          const UV o = d - (U8*)SvPVX_const(dest);
                          SvGROW(dest, min);
@@ -4268,7 +4267,8 @@ PP(pp_fc)
                      *(d)++ = 's';
                      *d = 's';
                  }
-                else { /* If it's not one of those two, the fold is their lower case */
+                else { /* If it's not one of those two, the fold is their lower
+                          case */
                      *d = toLOWER_LATIN1(*s);
                  }
               }
@@ -4382,7 +4382,9 @@ PP(pp_rkeys)
         return (SvTYPE(sv) == SVt_PVHV) ? Perl_do_kv(aTHX) : Perl_pp_akeys(aTHX);
      }
      else {
-       return (SvTYPE(sv) == SVt_PVHV) ? Perl_pp_each(aTHX) : Perl_pp_aeach(aTHX);
+       return (SvTYPE(sv) == SVt_PVHV)
+               ? Perl_pp_each(aTHX)
+               : Perl_pp_aeach(aTHX);
      }
  }
  
@@ -4826,20 +4828,30 @@ PP(pp_anonlist)
  PP(pp_anonhash)
  {
      dVAR; dSP; dMARK; dORIGMARK;
-    HV* const hv = newHV();
+    HV* const hv = (HV *)sv_2mortal((SV *)newHV());
  
      while (MARK < SP) {
-       SV * const key = *++MARK;
-       SV * const val = newSV(0);
+       SV * const key =
+           (MARK++, SvGMAGICAL(*MARK) ? sv_mortalcopy(*MARK) : *MARK);
+       SV *val;
         if (MARK < SP)
-           sv_setsv(val, *++MARK);
+       {
+           MARK++;
+           SvGETMAGIC(*MARK);
+           val = newSV(0);
+           sv_setsv(val, *MARK);
+       }
         else
+       {
             Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
+           val = newSV(0);
+       }
         (void)hv_store_ent(hv,key,val,0);
      }
      SP = ORIGMARK;
-    mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
-           ? newRV_noinc(MUTABLE_SV(hv)) : MUTABLE_SV(hv));
+    if (PL_op->op_flags & OPf_SPECIAL)
+       mXPUSHs(newRV_inc(MUTABLE_SV(hv)));
+    else XPUSHs(MUTABLE_SV(hv));
      RETURN;
  }
  
@@ -5092,11 +5104,14 @@ PP(pp_push)
         SPAGAIN;
      }
      else {
+       if (SvREADONLY(ary) && MARK < SP) Perl_croak_no_modify();
         PL_delaymagic = DM_DELAY;
         for (++MARK; MARK <= SP; MARK++) {
-           SV * const sv = newSV(0);
+           SV *sv;
+           if (*MARK) SvGETMAGIC(*MARK);
+           sv = newSV(0);
             if (*MARK)
-               sv_setsv(sv, *MARK);
+               sv_setsv_nomg(sv, *MARK);
             av_store(ary, AvFILLp(ary)+1, sv);
         }
         if (PL_delaymagic & DM_ARRAY_ISA)
@@ -5298,13 +5313,16 @@ PP(pp_split)
      STRLEN len;
      const char *s = SvPV_const(sv, len);
      const bool do_utf8 = DO_UTF8(sv);
+    const bool skipwhite = PL_op->op_flags & OPf_SPECIAL;
      const char *strend = s + len;
      PMOP *pm;
      REGEXP *rx;
      SV *dstr;
      const char *m;
      I32 iters = 0;
-    const STRLEN slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : (STRLEN)(strend - s);
+    const STRLEN slen = do_utf8
+                        ? utf8_length((U8*)s, (U8*)strend)
+                        : (STRLEN)(strend - s);
      I32 maxiters = slen + 10;
      I32 trailing_empty = 0;
      const char *orig;
@@ -5328,7 +5346,7 @@ PP(pp_split)
      rx = PM_GETRE(pm);
  
      TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
-            (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
+            (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite));
  
      RX_MATCH_UTF8_set(rx, do_utf8);
  
@@ -5343,7 +5361,7 @@ PP(pp_split)
  #endif
      else
         ary = NULL;
-    if (ary && (gimme != G_ARRAY || (pm->op_pmflags & PMf_ONCE))) {
+    if (ary) {
         realarray = 1;
         PUTBACK;
         av_extend(ary,0);
@@ -5359,7 +5377,7 @@ PP(pp_split)
                 AvREAL_on(ary);
                 AvREIFY_off(ary);
                 for (i = AvFILLp(ary); i >= 0; i--)
-                   AvARRAY(ary)[i] = &PL_sv_undef;     /* don't free mere refs */
+                   AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
             }
             /* temporarily switch stacks */
             SAVESWITCHSTACK(PL_curstack, ary);
@@ -5368,9 +5386,9 @@ PP(pp_split)
      }
      base = SP - PL_stack_base;
      orig = s;
-    if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
+    if (skipwhite) {
         if (do_utf8) {
-           while (*s == ' ' || is_utf8_space((U8*)s))
+           while (isSPACE_utf8(s))
                 s += UTF8SKIP(s);
         }
         else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
@@ -5390,21 +5408,22 @@ PP(pp_split)
  
      if (!limit)
         limit = maxiters + 2;
-    if (RX_EXTFLAGS(rx) & RXf_WHITE) {
+    if (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite) {
         while (--limit) {
             m = s;
             /* this one uses 'm' and is a negative test */
             if (do_utf8) {
-               while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
+               while (m < strend && ! isSPACE_utf8(m) ) {
                     const int t = UTF8SKIP(m);
-                   /* is_utf8_space returns FALSE for malform utf8 */
+                   /* isSPACE_utf8 returns FALSE for malform utf8 */
                     if (strend - m < t)
                         m = strend;
                     else
                         m += t;
                 }
             }
-           else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
+           else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET)
+            {
                 while (m < strend && !isSPACE_LC(*m))
                     ++m;
              } else {
@@ -5434,10 +5453,11 @@ PP(pp_split)
  
             /* this one uses 's' and is a positive test */
             if (do_utf8) {
-               while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
+               while (s < strend && isSPACE_utf8(s) )
                     s +=  UTF8SKIP(s);
             }
-           else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
+           else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET)
+            {
                 while (s < strend && isSPACE_LC(*s))
                     ++s;
              } else {
@@ -5549,7 +5569,7 @@ PP(pp_split)
                         trailing_empty = 0;
                 } else {
                     dstr = newSVpvn_flags(s, m-s,
-                                         (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
+                                        (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
                     XPUSHs(dstr);
                 }
                 /* The rx->minlen is in characters but we want to step
@@ -5573,7 +5593,7 @@ PP(pp_split)
                         trailing_empty = 0;
                 } else {
                     dstr = newSVpvn_flags(s, m-s,
-                                         (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
+                                        (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
                     XPUSHs(dstr);
                 }
                 /* The rx->minlen is in characters but we want to step
@@ -5591,7 +5611,7 @@ PP(pp_split)
         {
             I32 rex_return;
             PUTBACK;
-           rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1 ,
+           rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1,
                                      sv, NULL, 0);
             SPAGAIN;
             if (rex_return == 0)