sv.c: Drop PV when assigning over regexp

[perl5.git] / sv.c
diff --git a/sv.c b/sv.c

index 641b56f..5360b21 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -1161,7 +1161,7 @@ Perl_sv_upgrade(pTHX_ register SV *const sv, svtype new_type)
         no longer need to unshare so as to free up the IVX slot for its proper
         purpose. So it's safe to move the early return earlier.  */
  
-    if (new_type != SVt_PV && SvIsCOW(sv)) {
+    if (new_type > SVt_PVMG && SvIsCOW(sv)) {
         sv_force_normal_flags(sv, 0);
      }
  
@@ -1329,11 +1329,6 @@ Perl_sv_upgrade(pTHX_ register SV *const sv, svtype new_type)
         }
         break;
  
-
-    case SVt_REGEXP:
-       /* This ensures that SvTHINKFIRST(sv) is true, and hence that
-          sv_force_normal_flags(sv) is called.  */
-       SvFAKE_on(sv);
      case SVt_PVIV:
         /* XXX Is this still needed?  Was it ever needed?   Surely as there is
            no route from NV to PVIV, NOK can never be true  */
@@ -1344,6 +1339,7 @@ Perl_sv_upgrade(pTHX_ register SV *const sv, svtype new_type)
      case SVt_PVGV:
      case SVt_PVCV:
      case SVt_PVLV:
+    case SVt_REGEXP:
      case SVt_PVMG:
      case SVt_PVNV:
      case SVt_PV:
@@ -2067,7 +2063,7 @@ S_sv_2iuv_common(pTHX_ SV *const sv)
                                   SvUVX(sv)));
         }
      }
-    else if (SvPOKp(sv) && SvLEN(sv)) {
+    else if (SvPOKp(sv)) {
         UV value;
         const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
         /* We want to avoid a possible problem when we cache an IV/ a UV which
@@ -2277,18 +2273,17 @@ Perl_sv_2iv_flags(pTHX_ register SV *const sv, const I32 flags)
         return PTR2IV(SvRV(sv));
      }
  
-    if (SvVALID(sv)) {
+    if (SvVALID(sv) || SvTYPE(sv) == SVt_REGEXP) {
         /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
            the same flag bit as SVf_IVisUV, so must not let them cache IVs.
            In practice they are extremely unlikely to actually get anywhere
            accessible by user Perl code - the only way that I'm aware of is when
            a constant subroutine which is used as the second argument to index.
+
+          Regexps have no SvIVX and SvNVX fields.
         */
-       if (SvIOKp(sv))
-           return SvIVX(sv);
-       if (SvNOKp(sv))
-           return I_V(SvNVX(sv));
-       if (SvPOKp(sv) && SvLEN(sv)) {
+       assert(SvPOKp(sv));
+       {
             UV value;
             const int numtype
                 = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
@@ -2310,15 +2305,14 @@ Perl_sv_2iv_flags(pTHX_ register SV *const sv, const I32 flags)
             }
             return I_V(Atof(SvPVX_const(sv)));
         }
-       if (ckWARN(WARN_UNINITIALIZED))
-           report_uninit(sv);
-       return 0;
      }
  
      if (SvTHINKFIRST(sv)) {
+#ifdef PERL_OLD_COPY_ON_WRITE
         if (SvIsCOW(sv)) {
             sv_force_normal_flags(sv, 0);
         }
+#endif
         if (SvREADONLY(sv) && !SvOK(sv)) {
             if (ckWARN(WARN_UNINITIALIZED))
                 report_uninit(sv);
@@ -2370,14 +2364,12 @@ Perl_sv_2uv_flags(pTHX_ register SV *const sv, const I32 flags)
         return PTR2UV(SvRV(sv));
      }
  
-    if (SvVALID(sv)) {
+    if (SvVALID(sv) || SvTYPE(sv) == SVt_REGEXP) {
         /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
-          the same flag bit as SVf_IVisUV, so must not let them cache IVs.  */
-       if (SvIOKp(sv))
-           return SvUVX(sv);
-       if (SvNOKp(sv))
-           return U_V(SvNVX(sv));
-       if (SvPOKp(sv) && SvLEN(sv)) {
+          the same flag bit as SVf_IVisUV, so must not let them cache IVs.  
+          Regexps have no SvIVX and SvNVX fields. */
+       assert(SvPOKp(sv));
+       {
             UV value;
             const int numtype
                 = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
@@ -2394,15 +2386,14 @@ Perl_sv_2uv_flags(pTHX_ register SV *const sv, const I32 flags)
             }
             return U_V(Atof(SvPVX_const(sv)));
         }
-       if (ckWARN(WARN_UNINITIALIZED))
-           report_uninit(sv);
-       return 0;
      }
  
      if (SvTHINKFIRST(sv)) {
+#ifdef PERL_OLD_COPY_ON_WRITE
         if (SvIsCOW(sv)) {
             sv_force_normal_flags(sv, 0);
         }
+#endif
         if (SvREADONLY(sv) && !SvOK(sv)) {
             if (ckWARN(WARN_UNINITIALIZED))
                 report_uninit(sv);
@@ -2436,14 +2427,15 @@ Perl_sv_2nv_flags(pTHX_ register SV *const sv, const I32 flags)
      dVAR;
      if (!sv)
         return 0.0;
-    if (SvGMAGICAL(sv) || SvVALID(sv)) {
+    if (SvGMAGICAL(sv) || SvVALID(sv) || SvTYPE(sv) == SVt_REGEXP) {
         /* FBMs use the space for SvIVX and SvNVX for other purposes, and use
-          the same flag bit as SVf_IVisUV, so must not let them cache NVs.  */
+          the same flag bit as SVf_IVisUV, so must not let them cache NVs.
+          Regexps have no SvIVX and SvNVX fields.  */
         if (flags & SV_GMAGIC)
             mg_get(sv);
         if (SvNOKp(sv))
             return SvNVX(sv);
-       if ((SvPOKp(sv) && SvLEN(sv)) && !SvIOKp(sv)) {
+       if (SvPOKp(sv) && !SvIOKp(sv)) {
             if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) &&
                 !grok_number(SvPVX_const(sv), SvCUR(sv), NULL))
                 not_a_number(sv);
@@ -2475,9 +2467,11 @@ Perl_sv_2nv_flags(pTHX_ register SV *const sv, const I32 flags)
             }
             return PTR2NV(SvRV(sv));
         }
+#ifdef PERL_OLD_COPY_ON_WRITE
         if (SvIsCOW(sv)) {
             sv_force_normal_flags(sv, 0);
         }
+#endif
         if (SvREADONLY(sv) && !SvOK(sv)) {
             if (ckWARN(WARN_UNINITIALIZED))
                 report_uninit(sv);
@@ -2527,7 +2521,7 @@ Perl_sv_2nv_flags(pTHX_ register SV *const sv, const I32 flags)
             SvNOKp_on(sv);
  #endif
      }
-    else if (SvPOKp(sv) && SvLEN(sv)) {
+    else if (SvPOKp(sv)) {
         UV value;
         const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value);
         if (!SvIOKp(sv) && !numtype && ckWARN(WARN_NUMERIC))
@@ -3100,7 +3094,7 @@ Always sets the SvUTF8 flag to avoid future validity checks even
  if the whole string is the same in UTF-8 as not.
  Returns the number of bytes in the converted string
  
-This is not as a general purpose byte encoding to Unicode interface:
+This is not a general purpose byte encoding to Unicode interface:
  use the Encode extension for that.
  
  =for apidoc sv_utf8_upgrade_nomg
@@ -3119,7 +3113,7 @@ Returns the number of bytes in the converted string
  C<sv_utf8_upgrade> and
  C<sv_utf8_upgrade_nomg> are implemented in terms of this function.
  
-This is not as a general purpose byte encoding to Unicode interface:
+This is not a general purpose byte encoding to Unicode interface:
  use the Encode extension for that.
  
  =cut
@@ -3422,7 +3416,7 @@ in a byte, this conversion will fail;
  in this case, either returns false or, if C<fail_ok> is not
  true, croaks.
  
-This is not as a general purpose Unicode to byte encoding interface:
+This is not a general purpose Unicode to byte encoding interface:
  use the Encode extension for that.
  
  =cut
@@ -4189,7 +4183,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags)
                 in a newer implementation.  */
              /* If we are COW and dstr is a suitable target then we drop down
                 into the else and make dest a COW of us.  */
-            || (SvFLAGS(dstr) & CAN_COW_MASK) != CAN_COW_FLAGS
+            || (SvFLAGS(dstr) & SVf_BREAK)
  #endif
              )
              &&
@@ -4720,7 +4714,7 @@ Perl_sv_force_normal_flags(pTHX_ register SV *const sv, const U32 flags)
  
  #ifdef PERL_OLD_COPY_ON_WRITE
      if (SvREADONLY(sv)) {
-       if (SvFAKE(sv)) {
+       if (SvIsCOW(sv)) {
             const char * const pvx = SvPVX_const(sv);
             const STRLEN len = SvLEN(sv);
             const STRLEN cur = SvCUR(sv);
@@ -4803,14 +4797,10 @@ Perl_sv_force_normal_flags(pTHX_ register SV *const sv, const U32 flags)
         }
         SvCUR_set(temp, SvCUR(sv));
         /* Remember that SvPVX is in the head, not the body. */
-       if (SvLEN(temp)) {
-           SvLEN_set(temp, SvLEN(sv));
-           /* This signals "buffer is owned by someone else" in sv_clear,
-              which is the least effort way to stop it freeing the buffer.
-           */
-           SvLEN_set(sv, SvLEN(sv)+1);
-       } else {
-           /* Their buffer is already owned by someone else. */
+       assert(!SvLEN(sv));
+       /* Their buffer is already owned by someone else. */
+       if (flags & SV_COW_DROP_PV) SvPOK_off(sv);
+       else {
             SvPVX(sv) = savepvn(SvPVX(sv), SvCUR(sv));
             SvLEN_set(temp, SvCUR(sv)+1);
         }
@@ -8225,13 +8215,15 @@ statement boundaries.  See also C<sv_newmortal> and C<sv_2mortal>.
   * permanent location. */
  
  SV *
-Perl_sv_mortalcopy(pTHX_ SV *const oldstr)
+Perl_sv_mortalcopy_flags(pTHX_ SV *const oldstr, U32 flags)
  {
      dVAR;
      SV *sv;
  
+    if (flags & SV_GMAGIC)
+       SvGETMAGIC(oldstr); /* before new_SV, in case it dies */
      new_SV(sv);
-    sv_setsv(sv,oldstr);
+    sv_setsv_flags(sv,oldstr,flags & ~SV_GMAGIC);
      PUSH_EXTEND_MORTAL__SV_C(sv);
      SvTEMP_on(sv);
      return sv;
@@ -9122,7 +9114,7 @@ Perl_sv_pvutf8n_force(pTHX_ SV *const sv, STRLEN *const lp)
  {
      PERL_ARGS_ASSERT_SV_PVUTF8N_FORCE;
  
-    sv_pvn_force(sv,lp);
+    sv_pvn_force(sv,0);
      sv_utf8_upgrade_nomg(sv);
      *lp = SvCUR(sv);
      return SvPVX(sv);
@@ -9454,9 +9446,7 @@ Perl_sv_bless(pTHX_ SV *const sv, HV *const stash)
          Perl_croak(aTHX_ "Can't bless non-reference value");
      tmpRef = SvRV(sv);
      if (SvFLAGS(tmpRef) & (SVs_OBJECT|SVf_READONLY)) {
-       if (SvIsCOW(tmpRef))
-           sv_force_normal_flags(tmpRef, 0);
-       if (SvREADONLY(tmpRef))
+       if (SvREADONLY(tmpRef) && !SvIsCOW(tmpRef))
             Perl_croak_no_modify(aTHX);
         if (SvOBJECT(tmpRef)) {
             if (SvTYPE(tmpRef) != SVt_PVIO)
@@ -10532,16 +10522,17 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 if (DO_UTF8(argsv)) {
                     STRLEN old_precis = precis;
                     if (has_precis && precis < elen) {
-                       STRLEN ulen = sv_len_utf8_nomg(argsv);
+                       STRLEN ulen = sv_or_pv_len_utf8(argsv, eptr, elen);
                         STRLEN p = precis > ulen ? ulen : precis;
-                       precis = sv_pos_u2b_flags(argsv, p, 0, 0);
+                       precis = sv_or_pv_pos_u2b(argsv, eptr, p, 0);
                                                         /* sticks at end */
                     }
                     if (width) { /* fudge width (can't fudge elen) */
                         if (has_precis && precis < elen)
                             width += precis - old_precis;
                         else
-                           width += elen - sv_len_utf8_nomg(argsv);
+                           width +=
+                               elen - sv_or_pv_len_utf8(argsv,eptr,elen);
                     }
                     is_utf8 = TRUE;
                 }
@@ -11095,13 +11086,13 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
         have = esignlen + zeros + elen;
         if (have < zeros)
-           Perl_croak_nocontext("%s", PL_memory_wrap);
+           croak_memory_wrap();
  
         need = (have > width ? have : width);
         gap = need - have;
  
         if (need >= (((STRLEN)~0) - SvCUR(sv) - dotstrlen - 1))
-           Perl_croak_nocontext("%s", PL_memory_wrap);
+           croak_memory_wrap();
         SvGROW(sv, SvCUR(sv) + need + dotstrlen + 1);
         p = SvEND(sv);
         if (esignlen && fill == '0') {
@@ -13376,6 +13367,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
      PL_VertSpace       = sv_dup_inc(proto_perl->IVertSpace, param);
  
      PL_NonL1NonFinalFold = sv_dup_inc(proto_perl->INonL1NonFinalFold, param);
+    PL_HasMultiCharFold= sv_dup_inc(proto_perl->IHasMultiCharFold, param);
  
      /* utf8 character class swashes */
      PL_utf8_alnum      = sv_dup_inc(proto_perl->Iutf8_alnum, param);
@@ -13407,7 +13399,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
      PL_AboveLatin1     = sv_dup_inc(proto_perl->IAboveLatin1, param);
      PL_Latin1          = sv_dup_inc(proto_perl->ILatin1, param);
  
-
      if (proto_perl->Ipsig_pend) {
         Newxz(PL_psig_pend, SIG_SIZE, int);
      }
@@ -13715,8 +13706,8 @@ Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding)
         save_re_context();
         PUSHMARK(sp);
         EXTEND(SP, 3);
-       XPUSHs(encoding);
-       XPUSHs(sv);
+       PUSHs(encoding);
+       PUSHs(sv);
  /*
    NI-S 2002/07/09
    Passing sv_yes is wrong - it needs to be or'ed set of constants
@@ -13786,12 +13777,12 @@ Perl_sv_cat_decode(pTHX_ SV *dsv, SV *encoding,
         save_re_context();
         PUSHMARK(sp);
         EXTEND(SP, 6);
-       XPUSHs(encoding);
-       XPUSHs(dsv);
-       XPUSHs(ssv);
+       PUSHs(encoding);
+       PUSHs(dsv);
+       PUSHs(ssv);
         offsv = newSViv(*offset);
-       mXPUSHs(offsv);
-       mXPUSHp(tstr, tlen);
+       mPUSHs(offsv);
+       mPUSHp(tstr, tlen);
         PUTBACK;
         call_method("cat_decode", G_SCALAR);
         SPAGAIN;
@@ -13835,7 +13826,7 @@ S_find_hash_subscript(pTHX_ const HV *const hv, const SV *const val)
  
      array = HvARRAY(hv);
  
-    for (i=HvMAX(hv); i>0; i--) {
+    for (i=HvMAX(hv); i>=0; i--) {
         HE *entry;
         for (entry = array[i]; entry; entry = HeNEXT(entry)) {
             if (HeVAL(entry) != val)
@@ -13924,7 +13915,7 @@ Perl_varname(pTHX_ const GV *const gv, const char gvtype, PADOFFSET targ,
             return NULL;
         av = *PadlistARRAY(CvPADLIST(cv));
         sv = *av_fetch(av, targ, FALSE);
-       sv_setsv(name, sv);
+       sv_setsv_flags(name, sv, 0);
      }
  
      if (subscript_type == FUV_SUBSCRIPT_HASH) {