This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Convert Perl_sv_pos_u2b_proper() to Perl_sv_pos_u2b_flags().
[perl5.git] / pp.c
diff --git a/pp.c b/pp.c
index c659b13..3e2ed48 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -3079,15 +3079,19 @@ PP(pp_substr)
 {
     dVAR; dSP; dTARGET;
     SV *sv;
-    I32 len = 0;
     STRLEN curlen;
     STRLEN utf8_curlen;
-    I32 pos;
-    I32 rem;
-    I32 fail;
+    SV *   pos_sv;
+    IV     pos1_iv;
+    int    pos1_is_uv;
+    IV     pos2_iv;
+    int    pos2_is_uv;
+    SV *   len_sv;
+    IV     len_iv = 0;
+    int    len_is_uv = 1;
     const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
     const char *tmps;
-    const I32 arybase = CopARYBASE_get(PL_curcop);
+    const IV arybase = CopARYBASE_get(PL_curcop);
     SV *repl_sv = NULL;
     const char *repl = NULL;
     STRLEN repl_len;
@@ -3103,9 +3107,13 @@ PP(pp_substr)
            repl = SvPV_const(repl_sv, repl_len);
            repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
        }
-       len = POPi;
+       len_sv    = POPs;
+       len_iv    = SvIV(len_sv);
+       len_is_uv = SvIOK_UV(len_sv);
     }
-    pos = POPi;
+    pos_sv     = POPs;
+    pos1_iv    = SvIV(pos_sv);
+    pos1_is_uv = SvIOK_UV(pos_sv);
     sv = POPs;
     PUTBACK;
     if (repl_sv) {
@@ -3127,51 +3135,80 @@ PP(pp_substr)
     else
        utf8_curlen = 0;
 
-    if (pos >= arybase) {
-       pos -= arybase;
-       rem = curlen-pos;
-       fail = rem;
-       if (num_args > 2) {
-           if (len < 0) {
-               rem += len;
-               if (rem < 0)
-                   rem = 0;
-           }
-           else if (rem > len)
-                    rem = len;
+    if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
+       UV pos1_uv = pos1_iv-arybase;
+       /* Overflow can occur when $[ < 0 */
+       if (arybase < 0 && pos1_uv < (UV)pos1_iv)
+           goto bound_fail;
+       pos1_iv = pos1_uv;
+       pos1_is_uv = 1;
+    }
+    else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
+       goto bound_fail;  /* $[=3; substr($_,2,...) */
+    }
+    else { /* pos < $[ */
+       if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
+           pos1_iv = curlen;
+           pos1_is_uv = 1;
+       } else {
+           if (curlen) {
+               pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
+               pos1_iv += curlen;
+          }
        }
     }
-    else {
-       pos += curlen;
-       if (num_args < 3)
-           rem = curlen;
-       else if (len >= 0) {
-           rem = pos+len;
-           if (rem > (I32)curlen)
-               rem = curlen;
+    if (pos1_is_uv || pos1_iv > 0) {
+       if ((UV)pos1_iv > curlen)
+           goto bound_fail;
+    }
+
+    if (num_args > 2) {
+       if (!len_is_uv && len_iv < 0) {
+           pos2_iv = curlen + len_iv;
+           if (curlen)
+               pos2_is_uv = curlen-1 > ~(UV)len_iv;
+           else
+               pos2_is_uv = 0;
+       } else {  /* len_iv >= 0 */
+           if (!pos1_is_uv && pos1_iv < 0) {
+               pos2_iv = pos1_iv + len_iv;
+               pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
+           } else {
+               if ((UV)len_iv > curlen-(UV)pos1_iv)
+                   pos2_iv = curlen;
+               else
+                   pos2_iv = pos1_iv+len_iv;
+               pos2_is_uv = 1;
+           }
        }
-       else {
-           rem = curlen+len;
-           if (rem < pos)
-               rem = pos;
-       }
-       if (pos < 0)
-           pos = 0;
-       fail = rem;
-       rem -= pos;
-    }
-    if (fail < 0) {
-       if (lvalue || repl)
-           Perl_croak(aTHX_ "substr outside of string");
-       Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
-       RETPUSHUNDEF;
     }
     else {
-       const I32 upos = pos;
-       const I32 urem = rem;
-       if (utf8_curlen)
-           sv_pos_u2b(sv, &pos, &rem);
-       tmps += pos;
+       pos2_iv = curlen;
+       pos2_is_uv = 1;
+    }
+
+    if (!pos2_is_uv && pos2_iv < 0) {
+       if (!pos1_is_uv && pos1_iv < 0)
+           goto bound_fail;
+       pos2_iv = 0;
+    }
+    else if (!pos1_is_uv && pos1_iv < 0)
+       pos1_iv = 0;
+
+    if ((UV)pos2_iv < (UV)pos1_iv)
+       pos2_iv = pos1_iv;
+    if ((UV)pos2_iv > curlen)
+       pos2_iv = curlen;
+
+    {
+       /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
+       const STRLEN pos = (STRLEN)( (UV)pos1_iv );
+       const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
+       STRLEN byte_len = len;
+       STRLEN byte_pos = utf8_curlen
+           ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
+
+       tmps += byte_pos;
        /* we either return a PV or an LV. If the TARG hasn't been used
         * before, or is of that type, reuse it; otherwise use a mortal
         * instead. Note that LVs can have an extended lifetime, so also
@@ -3185,7 +3222,7 @@ PP(pp_substr)
            }
        }
 
-       sv_setpvn(TARG, tmps, rem);
+       sv_setpvn(TARG, tmps, byte_len);
 #ifdef USE_LOCALE_COLLATE
        sv_unmagic(TARG, PERL_MAGIC_collxfrm);
 #endif
@@ -3202,7 +3239,7 @@ PP(pp_substr)
            }
            if (!SvOK(sv))
                sv_setpvs(sv, "");
-           sv_insert_flags(sv, pos, rem, repl, repl_len, 0);
+           sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
            if (repl_is_utf8)
                SvUTF8_on(sv);
            SvREFCNT_dec(repl_sv_copy);
@@ -3232,13 +3269,19 @@ PP(pp_substr)
                SvREFCNT_dec(LvTARG(TARG));
                LvTARG(TARG) = SvREFCNT_inc_simple(sv);
            }
-           LvTARGOFF(TARG) = upos;
-           LvTARGLEN(TARG) = urem;
+           LvTARGOFF(TARG) = pos;
+           LvTARGLEN(TARG) = len;
        }
     }
     SPAGAIN;
     PUSHs(TARG);               /* avoid SvSETMAGIC here */
     RETURN;
+
+bound_fail:
+    if (lvalue || repl)
+       Perl_croak(aTHX_ "substr outside of string");
+    Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
+    RETPUSHUNDEF;
 }
 
 PP(pp_vec)