This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Deprecate mg_length; make it return bytes
[perl5.git] / regexec.c
index 4fffae7..989affa 100644 (file)
--- a/regexec.c
+++ b/regexec.c
 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 
+
+#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
+#define NEXTCHR_IS_EOS (nextchr < 0)
+
+#define SET_nextchr \
+    nextchr = ((locinput < PL_regeol) ? UCHARAT(locinput) : NEXTCHR_EOS)
+
+#define SET_locinput(p) \
+    locinput = (p);  \
+    SET_nextchr
+
+
 /* these are unrolled below in the CCC_TRY_XXX defined */
 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
     if (!CAT2(PL_utf8_,class)) { \
  * fails, or advance to the next character */
 
 #define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR)                \
-    if (locinput >= PL_regeol) {                                              \
+    if (NEXTCHR_IS_EOS) {                                              \
        sayNO;                                                                \
     }                                                                         \
     if (utf8_target && UTF8_IS_CONTINUED(nextchr)) {                          \
        if (POS_OR_NEG (UTF8_TEST)) {                                         \
            sayNO;                                                            \
        }                                                                     \
-       locinput += PL_utf8skip[nextchr];                                     \
-       nextchr = UCHARAT(locinput);                                          \
-       break;                                                                \
     }                                                                         \
-    if (POS_OR_NEG (FUNC(nextchr))) {                                         \
-       sayNO;                                                                \
+    else if (POS_OR_NEG (FUNC(nextchr))) {                                    \
+            sayNO;                                                            \
     }                                                                         \
-    nextchr = UCHARAT(++locinput);                                            \
-    break;
+    goto increment_locinput;
 
 /* Handle the non-locale cases for a character class and its complement.  It
  * calls _CCC_TRY_CODE with a ! to complement the test for the character class.
        _CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput),       \
                       CLASS, STR)                                            \
     case NAMEA:                                                               \
-       if (locinput >= PL_regeol || ! FUNCA(nextchr)) {                      \
+       if (NEXTCHR_IS_EOS || ! FUNCA(nextchr)) {                      \
            sayNO;                                                            \
        }                                                                     \
        /* Matched a utf8-invariant, so don't have to worry about utf8 */     \
-       nextchr = UCHARAT(++locinput);                                        \
+       locinput++;                                        \
        break;                                                                \
     case NNAMEA:                                                              \
-       if (locinput >= PL_regeol || FUNCA(nextchr)) {                        \
+       if (NEXTCHR_IS_EOS || FUNCA(nextchr)) {                        \
            sayNO;                                                            \
        }                                                                     \
-       if (utf8_target) {                                                    \
-           locinput += PL_utf8skip[nextchr];                                 \
-           nextchr = UCHARAT(locinput);                                      \
-       }                                                                     \
-       else {                                                                \
-           nextchr = UCHARAT(++locinput);                                    \
-       }                                                                     \
-       break;                                                                \
+        goto increment_locinput;                                              \
     /* Generate the non-locale cases */                                       \
     _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
 
@@ -608,7 +609,21 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
        goto fail;
     }
                 
-    strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
+    /* XXX we need to pass strbeg as a separate arg: the following is
+     * guesswork and can be wrong... */
+    if (sv && SvPOK(sv)) {
+        char * p   = SvPVX(sv);
+        STRLEN cur = SvCUR(sv); 
+        if (p <= strpos && strpos < p + cur) {
+            strbeg = p;
+            assert(p <= strend && strend <= p + cur);
+        }
+        else
+            strbeg = strend - cur;
+    }
+    else 
+        strbeg = strpos;
+
     PL_regeol = strend;
     if (utf8_target) {
        if (!prog->check_utf8 && prog->check_substr)
@@ -1249,7 +1264,7 @@ STMT_START {                                              \
 
 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
 STMT_START {                                          \
-    while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
+    while (s < strend && s + (uskip = UTF8SKIP(s)) <= strend) {     \
        CoDe                                          \
        s += uskip;                                   \
     }                                                 \
@@ -1774,32 +1789,32 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
            break;
        case LNBREAK:
            REXEC_FBC_CSCAN(
-               is_LNBREAK_utf8(s),
-               is_LNBREAK_latin1(s)
+               is_LNBREAK_utf8_safe(s, strend),
+               is_LNBREAK_latin1_safe(s, strend)
            );
            break;
        case VERTWS:
            REXEC_FBC_CSCAN(
-               is_VERTWS_utf8(s),
-               is_VERTWS_latin1(s)
+               is_VERTWS_utf8_safe(s, strend),
+               is_VERTWS_latin1_safe(s, strend)
            );
            break;
        case NVERTWS:
            REXEC_FBC_CSCAN(
-               !is_VERTWS_utf8(s),
-               !is_VERTWS_latin1(s)
+               !is_VERTWS_utf8_safe(s, strend),
+               !is_VERTWS_latin1_safe(s, strend)
            );
            break;
        case HORIZWS:
            REXEC_FBC_CSCAN(
-               is_HORIZWS_utf8(s),
-               is_HORIZWS_latin1(s)
+               is_HORIZWS_utf8_safe(s, strend),
+               is_HORIZWS_latin1_safe(s, strend)
            );
            break;
        case NHORIZWS:
            REXEC_FBC_CSCAN(
-               !is_HORIZWS_utf8(s),
-               !is_HORIZWS_latin1(s)
+               !is_HORIZWS_utf8_safe(s, strend),
+               !is_HORIZWS_latin1_safe(s, strend)
            );      
            break;
        case POSIXA:
@@ -1934,16 +1949,24 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                             
                         }
                         points[pointpos++ % maxlen]= uc;
-                       REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
+                        if (foldlen || uc < (U8*)strend) {
+                            REXEC_TRIE_READ_CHAR(trie_type, trie,
+                                             widecharmap, uc,
                                             uscan, len, uvc, charid, foldlen,
                                             foldbuf, uniflags);
-                        DEBUG_TRIE_EXECUTE_r({
-                            dump_exec_pos( (char *)uc, c, strend, real_start, 
-                                s,   utf8_target );
-                            PerlIO_printf(Perl_debug_log,
-                                " Charid:%3u CP:%4"UVxf" ",
-                                 charid, uvc);
-                        });
+                            DEBUG_TRIE_EXECUTE_r({
+                                dump_exec_pos( (char *)uc, c, strend,
+                                            real_start, s, utf8_target);
+                                PerlIO_printf(Perl_debug_log,
+                                    " Charid:%3u CP:%4"UVxf" ",
+                                     charid, uvc);
+                            });
+                        }
+                        else {
+                            len = 0;
+                            charid = 0;
+                        }
+
 
                         do {
 #ifdef DEBUGGING
@@ -2391,7 +2414,11 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
                while (s <= last1) {
                    if (regtry(&reginfo, &s))
                        goto got_it;
-                   s += UTF8SKIP(s);
+                    if (s >= last1) {
+                        s++; /* to break out of outer loop */
+                        break;
+                    }
+                    s += UTF8SKIP(s);
                }
            }
            else {
@@ -2693,7 +2720,6 @@ phooey:
         Safefree(prog->offs);
         prog->offs = swap;
     }
-
     return 0;
 }
 
@@ -2712,12 +2738,13 @@ phooey:
  - regtry - try match at specific point
  */
 STATIC I32                     /* 0 failure, 1 success */
-S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
+S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
 {
     dVAR;
     CHECKPOINT lastcp;
     REGEXP *const rx = reginfo->prog;
     regexp *const prog = (struct regexp *)SvANY(rx);
+    I32 result;
     RXi_GET_DECL(prog,progi);
     GET_RE_DEBUG_FLAGS_DECL;
 
@@ -2790,10 +2817,9 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
        prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
     }
 #ifdef DEBUGGING
-    PL_reg_starttry = *startpos;
+    PL_reg_starttry = *startposp;
 #endif
-    prog->offs[0].start = *startpos - PL_bostr;
-    PL_reginput = *startpos;
+    prog->offs[0].start = *startposp - PL_bostr;
     prog->lastparen = 0;
     prog->lastcloseparen = 0;
     PL_regsize = 0;
@@ -2823,12 +2849,13 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
     }
 #endif
     REGCP_SET(lastcp);
-    if (regmatch(reginfo, progi->program + 1)) {
-       prog->offs[0].end = PL_reginput - PL_bostr;
+    result = regmatch(reginfo, *startposp, progi->program + 1);
+    if (result != -1) {
+       prog->offs[0].end = result;
        return 1;
     }
     if (reginfo->cutpoint)
-        *startpos= reginfo->cutpoint;
+        *startposp= reginfo->cutpoint;
     REGCP_UNWIND(lastcp);
     return 0;
 }
@@ -2880,20 +2907,23 @@ S_push_slab(pTHX)
 
 /* push a new state then goto it */
 
-#define PUSH_STATE_GOTO(state, node) \
+#define PUSH_STATE_GOTO(state, node, input) \
+    pushinput = input; \
     scan = node; \
     st->resume_state = state; \
     goto push_state;
 
 /* push a new state with success backtracking, then goto it */
 
-#define PUSH_YES_STATE_GOTO(state, node) \
+#define PUSH_YES_STATE_GOTO(state, node, input) \
+    pushinput = input; \
     scan = node; \
     st->resume_state = state; \
     goto push_yes_state;
 
 
 
+
 /*
 
 regmatch() - main matching routine
@@ -2959,7 +2989,7 @@ implementation:
        // push a yes backtrack state with a resume value of
        // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
        // first node of A:
-       PUSH_YES_STATE_GOTO(IFMATCH_A, A);
+       PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
        // NOTREACHED
 
     case IFMATCH_A: // we have successfully executed A; now continue with B
@@ -2996,8 +3026,8 @@ The topmost backtrack state, pointed to by st, is usually free. If you
 want to claim it, populate any ST.foo fields in it with values you wish to
 save, then do one of
 
-       PUSH_STATE_GOTO(resume_state, node);
-       PUSH_YES_STATE_GOTO(resume_state, node);
+       PUSH_STATE_GOTO(resume_state, node, newinput);
+       PUSH_YES_STATE_GOTO(resume_state, node, newinput);
 
 which sets that backtrack state's resume value to 'resume_state', pushes a
 new free entry to the top of the backtrack stack, then goes to 'node'.
@@ -3189,8 +3219,9 @@ S_clear_backtrack_stack(pTHX_ void *p)
 }
 
 
-STATIC I32                     /* 0 failure, 1 success */
-S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
+/* returns -1 on failure, $+[0] on success */
+STATIC I32
+S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 {
 #if PERL_VERSION < 9 && !defined(PERL_CORE)
     dMY_CXT;
@@ -3209,7 +3240,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
     regnode *next;
     U32 n = 0; /* general value; init to avoid compiler warning */
     I32 ln = 0; /* len or last;  init to avoid compiler warning */
-    char *locinput = PL_reginput;
+    char *locinput = startpos;
+    char *pushinput; /* where to continue after a PUSH */
     I32 nextchr;   /* is always set to UCHARAT(locinput) */
 
     bool result = 0;       /* return value of S_regmatch */
@@ -3228,7 +3260,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
     U32 state_num;
     bool no_final = 0;      /* prevent failure from backtracking? */
     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
-    char *startpoint = PL_reginput;
+    char *startpoint = locinput;
     SV *popmark = NULL;     /* are we looking for a mark? */
     SV *sv_commit = NULL;   /* last mark name seen in failure */
     SV *sv_yes_mark = NULL; /* last mark name we have seen 
@@ -3293,7 +3325,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
        st = PL_regmatch_state = S_push_slab(aTHX);
 
     /* Note that nextchr is a byte even in UTF */
-    nextchr = UCHARAT(locinput);
+    SET_nextchr;
     scan = prog;
     while (scan != NULL) {
 
@@ -3318,96 +3350,92 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 
       reenter_switch:
 
+        SET_nextchr;
+
        switch (state_num) {
-       case BOL:
+       case BOL: /*  /^../  */
            if (locinput == PL_bostr)
            {
                /* reginfo->till = reginfo->bol; */
                break;
            }
            sayNO;
-       case MBOL:
+
+       case MBOL: /*  /^../m  */
            if (locinput == PL_bostr ||
-               ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
+               (!NEXTCHR_IS_EOS && locinput[-1] == '\n'))
            {
                break;
            }
            sayNO;
-       case SBOL:
+
+       case SBOL: /*  /^../s  */
            if (locinput == PL_bostr)
                break;
            sayNO;
-       case GPOS:
+
+       case GPOS: /*  \G  */
            if (locinput == reginfo->ganch)
                break;
            sayNO;
 
-       case KEEPS:
+       case KEEPS: /*   \K  */
            /* update the startpoint */
            st->u.keeper.val = rex->offs[0].start;
-           PL_reginput = locinput;
            rex->offs[0].start = locinput - PL_bostr;
-           PUSH_STATE_GOTO(KEEPS_next, next);
+           PUSH_STATE_GOTO(KEEPS_next, next, locinput);
            /*NOT-REACHED*/
        case KEEPS_next_fail:
            /* rollback the start point change */
            rex->offs[0].start = st->u.keeper.val;
            sayNO_SILENT;
            /*NOT-REACHED*/
-       case EOL:
+
+       case EOL: /* /..$/  */
                goto seol;
-       case MEOL:
-           if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
+
+       case MEOL: /* /..$/m  */
+           if (!NEXTCHR_IS_EOS && nextchr != '\n')
                sayNO;
            break;
-       case SEOL:
+
+       case SEOL: /* /..$/s  */
          seol:
-           if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
+           if (!NEXTCHR_IS_EOS && nextchr != '\n')
                sayNO;
            if (PL_regeol - locinput > 1)
                sayNO;
            break;
-       case EOS:
-           if (PL_regeol != locinput)
+
+       case EOS: /*  \z  */
+           if (!NEXTCHR_IS_EOS)
                sayNO;
            break;
-       case SANY:
-           if (!nextchr && locinput >= PL_regeol)
+
+       case SANY: /*  /./s  */
+           if (NEXTCHR_IS_EOS)
                sayNO;
-           if (utf8_target) {
-               locinput += PL_utf8skip[nextchr];
-               if (locinput > PL_regeol)
-                   sayNO;
-               nextchr = UCHARAT(locinput);
-           }
-           else
-               nextchr = UCHARAT(++locinput);
-           break;
-       case CANY:
-           if (!nextchr && locinput >= PL_regeol)
+            goto increment_locinput;
+
+       case CANY: /*  \C  */
+           if (NEXTCHR_IS_EOS)
                sayNO;
-           nextchr = UCHARAT(++locinput);
+           locinput++;
            break;
-       case REG_ANY:
-           if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
+
+       case REG_ANY: /*  /./  */
+           if ((NEXTCHR_IS_EOS) || nextchr == '\n')
                sayNO;
-           if (utf8_target) {
-               locinput += PL_utf8skip[nextchr];
-               if (locinput > PL_regeol)
-                   sayNO;
-               nextchr = UCHARAT(locinput);
-           }
-           else
-               nextchr = UCHARAT(++locinput);
-           break;
+            goto increment_locinput;
+
 
 #undef  ST
 #define ST st->u.trie
-        case TRIEC:
+        case TRIEC: /* (ab|cd) with known charclass */
             /* In this case the charclass data is available inline so
                we can fail fast without a lot of extra overhead. 
              */
-            if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
+            if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
                 DEBUG_EXECUTE_r(
                     PerlIO_printf(Perl_debug_log,
                               "%*s  %sfailed to match trie start class...%s\n",
@@ -3417,7 +3445,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 assert(0); /* NOTREACHED */
             }
             /* FALL THROUGH */
-       case TRIE:
+       case TRIE:  /* (ab|cd)  */
            /* the basic plan of execution of the trie is:
             * At the beginning, run though all the states, and
             * find the longest-matching word. Also remember the position
@@ -3426,7 +3454,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             *    ab|a|x|abcd|abc
             * when matched against the string "abcde", will generate
             * accept states for all words except 3, with the longest
-            * matching word being 4, and the shortest being 1 (with
+            * matching word being 4, and the shortest being 2 (with
             * the position being after char 1 of the string).
             *
             * Then for each matching word, in word order (i.e. 1,2,4,5),
@@ -3472,7 +3500,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
                 U32 state = trie->startstate;
 
-                if (trie->bitmap && !TRIE_BITMAP_TEST(trie,*locinput) ) {
+                if (   trie->bitmap
+                    && (NEXTCHR_IS_EOS || !TRIE_BITMAP_TEST(trie, nextchr)))
+                {
                    if (trie->states[ state ].wordnum) {
                         DEBUG_EXECUTE_r(
                             PerlIO_printf(Perl_debug_log,
@@ -3545,7 +3575,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                    });
 
                    /* read a char and goto next state */
-                   if ( base ) {
+                   if ( base && (foldlen || uc < (U8*)PL_regeol)) {
                        I32 offset;
                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
                                             uscan, len, uvc, charid, foldlen,
@@ -3602,6 +3632,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            assert(0); /* NOTREACHED */
 
        case TRIE_next_fail: /* we failed - try next alternative */
+        {
+            U8 *uc;
             if ( ST.jump) {
                 REGCP_UNWIND(ST.cp);
                 UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
@@ -3646,7 +3678,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            /* find start char of end of current word */
            {
                U32 chars; /* how many chars to skip */
-               U8 *uc = ST.firstpos;
                reg_trie_data * const trie
                    = (reg_trie_data*)rexi->data->data[ARG(ST.me)];
 
@@ -3654,6 +3685,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                            >=  ST.firstchars);
                chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
                            - ST.firstchars;
+               uc = ST.firstpos;
 
                if (ST.longfold) {
                    /* the hard option - fold each char in turn and find
@@ -3693,7 +3725,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                    else
                        uc += chars;
                }
-               PL_reginput = (char *)uc;
            }
 
            scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
@@ -3711,7 +3742,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            });
 
            if (ST.accepted > 1 || has_cutgroup) {
-               PUSH_STATE_GOTO(TRIE_next, scan);
+               PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc);
                assert(0); /* NOTREACHED */
            }
            /* only one choice left - just continue */
@@ -3734,13 +3765,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                    PL_colors[5] );
            });
 
-           locinput = PL_reginput;
-           nextchr = UCHARAT(locinput);
+           locinput = (char*)uc;
            continue; /* execute rest of RE */
            assert(0); /* NOTREACHED */
+        }
 #undef  ST
 
-       case EXACT: {
+       case EXACT: {            /*  /abc/        */
            char *s = STRING(scan);
            ln = STR_LEN(scan);
            if (utf8_target != UTF_PATTERN) {
@@ -3749,35 +3780,59 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                const char * const e = s + ln;
 
                if (utf8_target) {
-                   /* The target is utf8, the pattern is not utf8. */
+                    /* The target is utf8, the pattern is not utf8.
+                     * Above-Latin1 code points can't match the pattern;
+                     * invariants match exactly, and the other Latin1 ones need
+                     * to be downgraded to a single byte in order to do the
+                     * comparison.  (If we could be confident that the target
+                     * is not malformed, this could be refactored to have fewer
+                     * tests by just assuming that if the first bytes match, it
+                     * is an invariant, but there are tests in the test suite
+                     * dealing with (??{...}) which violate this) */
                    while (s < e) {
-                       STRLEN ulen;
                        if (l >= PL_regeol)
                             sayNO;
-                       if (NATIVE_TO_UNI(*(U8*)s) !=
-                           utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
-                                           uniflags))
-                            sayNO;
-                       l += ulen;
-                       s ++;
+                        if (UTF8_IS_ABOVE_LATIN1(* (U8*) l)) {
+                            sayNO;
+                        }
+                        if (UTF8_IS_INVARIANT(*(U8*)l)) {
+                           if (*l != *s) {
+                                sayNO;
+                            }
+                            l++;
+                        }
+                        else {
+                            if (TWO_BYTE_UTF8_TO_UNI(*l, *(l+1)) != * (U8*) s) {
+                                sayNO;
+                            }
+                            l += 2;
+                        }
+                       s++;
                    }
                }
                else {
                    /* The target is not utf8, the pattern is utf8. */
                    while (s < e) {
-                       STRLEN ulen;
-                       if (l >= PL_regeol)
-                           sayNO;
-                       if (NATIVE_TO_UNI(*((U8*)l)) !=
-                           utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
-                                          uniflags))
-                           sayNO;
-                       s += ulen;
-                       l ++;
+                        if (l >= PL_regeol || UTF8_IS_ABOVE_LATIN1(* (U8*) s))
+                        {
+                            sayNO;
+                        }
+                        if (UTF8_IS_INVARIANT(*(U8*)s)) {
+                           if (*s != *l) {
+                                sayNO;
+                            }
+                            s++;
+                        }
+                        else {
+                            if (TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)) != * (U8*) l) {
+                                sayNO;
+                            }
+                            s += 2;
+                        }
+                       l++;
                    }
                }
                locinput = l;
-               nextchr = UCHARAT(locinput);
                break;
            }
            /* The target and the pattern have the same utf8ness. */
@@ -3789,10 +3844,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            if (ln > 1 && memNE(s, locinput, ln))
                sayNO;
            locinput += ln;
-           nextchr = UCHARAT(locinput);
            break;
            }
-       case EXACTFL: {
+
+       case EXACTFL: {          /*  /abc/il      */
            re_fold_t folder;
            const U8 * fold_array;
            const char * s;
@@ -3804,21 +3859,21 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
            goto do_exactf;
 
-       case EXACTFU_SS:
-       case EXACTFU_TRICKYFOLD:
-       case EXACTFU:
+       case EXACTFU_SS:         /*  /\x{df}/iu   */
+       case EXACTFU_TRICKYFOLD: /*  /\x{390}/iu  */
+       case EXACTFU:            /*  /abc/iu      */
            folder = foldEQ_latin1;
            fold_array = PL_fold_latin1;
            fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
            goto do_exactf;
 
-       case EXACTFA:
+       case EXACTFA:            /*  /abc/iaa     */
            folder = foldEQ_latin1;
            fold_array = PL_fold_latin1;
            fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
            goto do_exactf;
 
-       case EXACTF:
+       case EXACTF:             /*  /abc/i       */
            folder = foldEQ;
            fold_array = PL_fold;
            fold_utf8_flags = 0;
@@ -3839,7 +3894,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                    sayNO;
                }
                locinput = e;
-               nextchr = UCHARAT(locinput);
                break;
            }
 
@@ -3854,23 +3908,22 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            if (ln > 1 && ! folder(s, locinput, ln))
                sayNO;
            locinput += ln;
-           nextchr = UCHARAT(locinput);
            break;
        }
 
        /* XXX Could improve efficiency by separating these all out using a
         * macro or in-line function.  At that point regcomp.c would no longer
         * have to set the FLAGS fields of these */
-       case BOUNDL:
-       case NBOUNDL:
+       case BOUNDL:  /*  /\b/l  */
+       case NBOUNDL: /*  /\B/l  */
            PL_reg_flags |= RF_tainted;
            /* FALL THROUGH */
-       case BOUND:
-       case BOUNDU:
-       case BOUNDA:
-       case NBOUND:
-       case NBOUNDU:
-       case NBOUNDA:
+       case BOUND:   /*  /\b/   */
+       case BOUNDU:  /*  /\b/u  */
+       case BOUNDA:  /*  /\b/a  */
+       case NBOUND:  /*  /\B/   */
+       case NBOUNDU: /*  /\B/u  */
+       case NBOUNDA: /*  /\B/a  */
            /* was last char in word? */
            if (utf8_target
                && FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET
@@ -3885,12 +3938,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                }
                if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
                    ln = isALNUM_uni(ln);
-                   LOAD_UTF8_CHARCLASS_ALNUM();
-                   n = swash_fetch(PL_utf8_alnum, (U8*)locinput, utf8_target);
+                    if (NEXTCHR_IS_EOS)
+                        n = 0;
+                    else {
+                        LOAD_UTF8_CHARCLASS_ALNUM();
+                        n = swash_fetch(PL_utf8_alnum, (U8*)locinput,
+                                                                utf8_target);
+                    }
                }
                else {
                    ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
-                   n = isALNUM_LC_utf8((U8*)locinput);
+                   n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC_utf8((U8*)locinput);
                }
            }
            else {
@@ -3911,20 +3969,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                switch (FLAGS(scan)) {
                    case REGEX_UNICODE_CHARSET:
                        ln = isWORDCHAR_L1(ln);
-                       n = isWORDCHAR_L1(nextchr);
+                       n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_L1(nextchr);
                        break;
                    case REGEX_LOCALE_CHARSET:
                        ln = isALNUM_LC(ln);
-                       n = isALNUM_LC(nextchr);
+                       n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC(nextchr);
                        break;
                    case REGEX_DEPENDS_CHARSET:
                        ln = isALNUM(ln);
-                       n = isALNUM(nextchr);
+                       n = NEXTCHR_IS_EOS ? 0 : isALNUM(nextchr);
                        break;
                    case REGEX_ASCII_RESTRICTED_CHARSET:
                    case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
                        ln = isWORDCHAR_A(ln);
-                       n = isWORDCHAR_A(nextchr);
+                       n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_A(nextchr);
                        break;
                    default:
                        Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
@@ -3936,31 +3994,28 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            if (((!ln) == (!n)) == (OP(scan) < NBOUND))
                    sayNO;
            break;
-       case ANYOFV:
-       case ANYOF:
+
+       case ANYOFV: /*  /[abx{df}]/i  */
+       case ANYOF:  /*  /[abc]/       */
+            if (NEXTCHR_IS_EOS)
+                sayNO;
            if (utf8_target || state_num == ANYOFV) {
                STRLEN inclasslen = PL_regeol - locinput;
-               if (locinput >= PL_regeol)
-                   sayNO;
-
                if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, utf8_target))
                    sayNO;
                locinput += inclasslen;
-               nextchr = UCHARAT(locinput);
                break;
            }
            else {
-               if (nextchr < 0)
-                   nextchr = UCHARAT(locinput);
-               if (!nextchr && locinput >= PL_regeol)
-                   sayNO;
                if (!REGINCLASS(rex, scan, (U8*)locinput))
                    sayNO;
-               nextchr = UCHARAT(++locinput);
+               locinput++;
                break;
            }
            break;
-       /* Special char classes - The defines start on line 129 or so */
+
+       /* Special char classes: \d, \w etc.
+         * The defines start on line 166 or so */
         CCC_TRY_U(ALNUM,  NALNUM,  isWORDCHAR,
                  ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8,
                  ALNUMU, NALNUMU, isWORDCHAR_L1,
@@ -3978,25 +4033,19 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                DIGITA, NDIGITA, isDIGIT_A,
                digit, "0");
 
-        case POSIXA:
-            if (locinput >= PL_regeol || ! _generic_isCC_A(nextchr, FLAGS(scan))) {
+        case POSIXA: /* /[[:ascii:]]/ etc */
+            if (NEXTCHR_IS_EOS || ! _generic_isCC_A(nextchr, FLAGS(scan))) {
                 sayNO;
             }
             /* Matched a utf8-invariant, so don't have to worry about utf8 */
-            nextchr = UCHARAT(++locinput);
+            locinput++;
             break;
-        case NPOSIXA:
-            if (locinput >= PL_regeol || _generic_isCC_A(nextchr, FLAGS(scan))) {
+
+        case NPOSIXA: /*  /[^[:ascii:]]/  etc */
+            if (NEXTCHR_IS_EOS || _generic_isCC_A(nextchr, FLAGS(scan))) {
                 sayNO;
             }
-            if (utf8_target) {
-                locinput += PL_utf8skip[nextchr];
-                nextchr = UCHARAT(locinput);
-            }
-            else {
-                nextchr = UCHARAT(++locinput);
-            }
-            break;
+            goto increment_locinput;
 
        case CLUMP: /* Match \X: logical Unicode character.  This is defined as
                       a Unicode extended Grapheme Cluster */
@@ -4032,7 +4081,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
               Prepend, that one will be a suitable Begin.
            */
 
-           if (locinput >= PL_regeol)
+           if (NEXTCHR_IS_EOS)
                sayNO;
            if  (! utf8_target) {
 
@@ -4048,7 +4097,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 
                /* Utf8: See if is ( CR LF ); already know that locinput <
                 * PL_regeol, so locinput+1 is in bounds */
-               if (nextchr == '\r' && UCHARAT(locinput + 1) == '\n') {
+               if ( nextchr == '\r' && locinput+1 < PL_regeol
+                        && UCHARAT(locinput + 1) == '\n')
+                {
                    locinput += 2;
                }
                else {
@@ -4181,10 +4232,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             exit_utf8:
                if (locinput > PL_regeol) sayNO;
            }
-           nextchr = UCHARAT(locinput);
            break;
             
-       case NREFFL:
+       case NREFFL:  /*  /\g{name}/il  */
        {   /* The capture buffer cases.  The ones beginning with N for the
               named buffers just convert to the equivalent numbered and
               pretend they were called as the corresponding numbered buffer
@@ -4204,28 +4254,28 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
            goto do_nref;
 
-       case NREFFA:
+       case NREFFA:  /*  /\g{name}/iaa  */
            folder = foldEQ_latin1;
            fold_array = PL_fold_latin1;
            type = REFFA;
            utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
            goto do_nref;
 
-       case NREFFU:
+       case NREFFU:  /*  /\g{name}/iu  */
            folder = foldEQ_latin1;
            fold_array = PL_fold_latin1;
            type = REFFU;
            utf8_fold_flags = 0;
            goto do_nref;
 
-       case NREFF:
+       case NREFF:  /*  /\g{name}/i  */
            folder = foldEQ;
            fold_array = PL_fold;
            type = REFF;
            utf8_fold_flags = 0;
            goto do_nref;
 
-       case NREF:
+       case NREF:  /*  /\g{name}/   */
            type = REF;
            folder = NULL;
            fold_array = NULL;
@@ -4241,32 +4291,32 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            }
            goto do_nref_ref_common;
 
-       case REFFL:
+       case REFFL:  /*  /\1/il  */
            PL_reg_flags |= RF_tainted;
            folder = foldEQ_locale;
            fold_array = PL_fold_locale;
            utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
            goto do_ref;
 
-       case REFFA:
+       case REFFA:  /*  /\1/iaa  */
            folder = foldEQ_latin1;
            fold_array = PL_fold_latin1;
            utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
            goto do_ref;
 
-       case REFFU:
+       case REFFU:  /*  /\1/iu  */
            folder = foldEQ_latin1;
            fold_array = PL_fold_latin1;
            utf8_fold_flags = 0;
            goto do_ref;
 
-       case REFF:
+       case REFF:  /*  /\1/i  */
            folder = foldEQ;
            fold_array = PL_fold;
            utf8_fold_flags = 0;
            goto do_ref;
 
-        case REF:
+        case REF:  /*  /\1/    */
            folder = NULL;
            fold_array = NULL;
            utf8_fold_flags = 0;
@@ -4300,12 +4350,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                    sayNO;
                }
                locinput = limit;
-               nextchr = UCHARAT(locinput);
                break;
            }
 
            /* Not utf8:  Inline the first character, for speed. */
-           if (UCHARAT(s) != nextchr &&
+           if (!NEXTCHR_IS_EOS &&
+                UCHARAT(s) != nextchr &&
                (type == REF ||
                 UCHARAT(s) != fold_array[nextchr]))
                sayNO;
@@ -4317,13 +4367,16 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                           : ! folder(s, locinput, ln)))
                sayNO;
            locinput += ln;
-           nextchr = UCHARAT(locinput);
            break;
        }
-       case NOTHING:
-       case TAIL:
+
+       case NOTHING: /* null op; e.g. the 'nothing' following
+                       * the '*' in m{(a+|b)*}' */
            break;
-       case BACK:
+       case TAIL: /* placeholder while compiling (A|B|C) */
+           break;
+
+       case BACK: /* ??? doesn't appear to be used ??? */
            break;
 
 #undef  ST
@@ -4335,7 +4388,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             regexp_internal *rei;
             regnode *startpoint;
 
-       case GOSTART:
+       case GOSTART: /*  (?R)  */
        case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
            if (cur_eval && cur_eval->locinput==locinput) {
                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan)) 
@@ -4359,6 +4412,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             }
             goto eval_recurse_doit;
             assert(0); /* NOTREACHED */
+
         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */        
             if (cur_eval && cur_eval->locinput==locinput) {
                if ( ++nochange_depth > max_nochange_depth )
@@ -4623,7 +4677,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                re->lastparen = 0;
                re->lastcloseparen = 0;
 
-               PL_reginput = locinput;
                PL_regsize = 0;
 
                /* XXXX This is too dramatic a measure... */
@@ -4647,7 +4700,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                ST.prev_eval = cur_eval;
                cur_eval = st;
                /* now continue from first node in postoned RE */
-               PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
+               PUSH_YES_STATE_GOTO(EVAL_AB, startpoint, locinput);
                assert(0); /* NOTREACHED */
        }
 
@@ -4677,7 +4730,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            rex = (struct regexp *)SvANY(rex_sv);
            rexi = RXi_GET(rex); 
 
-           PL_reginput = locinput;
            REGCP_UNWIND(ST.lastcp);
            regcppop(rex);
            cur_eval = ST.prev_eval;
@@ -4689,7 +4741,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
            sayNO_SILENT;
 #undef ST
 
-       case OPEN:
+       case OPEN: /*  (  */
            n = ARG(scan);  /* which paren pair */
            rex->offs[n].start_tmp = locinput - PL_bostr;
            if (n > PL_regsize)
@@ -4718,7 +4770,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
        (IV)rex->offs[n].end \
     ))
 
-       case CLOSE:
+       case CLOSE:  /*  )  */
            n = ARG(scan);  /* which paren pair */
            CLOSE_CAPTURE;
            /*if (n > PL_regsize)
@@ -4730,7 +4782,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                goto fake_end;
            }    
            break;
-        case ACCEPT:
+
+        case ACCEPT:  /*  (*ACCEPT)  */
             if (ARG(scan)){
                 regnode *cursor;
                 for (cursor=scan;
@@ -4755,22 +4808,27 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             }
            goto fake_end;
            /*NOTREACHED*/          
-       case GROUPP:
+
+       case GROUPP:  /*  (?(1))  */
            n = ARG(scan);  /* which paren pair */
            sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
            break;
-       case NGROUPP:
+
+       case NGROUPP:  /*  (?(<name>))  */
            /* reg_check_named_buff_matched returns 0 for no match */
            sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
            break;
-        case INSUBP:
+
+        case INSUBP:   /*  (?(R))  */
             n = ARG(scan);
             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
             break;
-        case DEFINEP:
+
+        case DEFINEP:  /*  (?(DEFINE))  */
             sw = 0;
             break;
-       case IFTHEN:
+
+       case IFTHEN:   /*  (?(cond)A|B)  */
            PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
            if (sw)
                next = NEXTOPER(NEXTOPER(scan));
@@ -4780,7 +4838,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                    next = NEXTOPER(NEXTOPER(next));
            }
            break;
-       case LOGICAL:
+
+       case LOGICAL:  /* modifier for EVAL and IFMATCH */
            logical = scan->flags;
            break;
 
@@ -4897,8 +4956,7 @@ NULL
            ST.count = -1;      /* this will be updated by WHILEM */
            ST.lastloc = NULL;  /* this will be updated by WHILEM */
 
-           PL_reginput = locinput;
-           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
+           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput);
            assert(0); /* NOTREACHED */
        }
 
@@ -4931,7 +4989,6 @@ NULL
            ST.cache_offset = 0;
            ST.cache_mask = 0;
            
-           PL_reginput = locinput;
 
            DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
                  "%*s  whilem: matched %ld out of %d..%d\n",
@@ -4945,7 +5002,7 @@ NULL
                cur_curlyx->u.curlyx.lastloc = locinput;
                REGCP_SET(ST.lastcp);
 
-               PUSH_STATE_GOTO(WHILEM_A_pre, A);
+               PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput);
                assert(0); /* NOTREACHED */
            }
 
@@ -5019,7 +5076,8 @@ NULL
                cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
                ST.cp = regcppush(rex, ST.save_curlyx->u.curlyx.parenfloor);
                REGCP_SET(ST.lastcp);
-               PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
+               PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
+                                    locinput);
                assert(0); /* NOTREACHED */
            }
 
@@ -5029,7 +5087,7 @@ NULL
                ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
                cur_curlyx->u.curlyx.lastloc = locinput;
                REGCP_SET(ST.lastcp);
-               PUSH_STATE_GOTO(WHILEM_A_max, A);
+               PUSH_STATE_GOTO(WHILEM_A_max, A, locinput);
                assert(0); /* NOTREACHED */
            }
            goto do_whilem_B_max;
@@ -5062,7 +5120,6 @@ NULL
        case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
            REGCP_UNWIND(ST.lastcp);
            regcppop(rex);      /* Restore some previous $<digit>s? */
-           PL_reginput = locinput;
            DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
                "%*s  whilem: failed, trying continuation...\n",
                REPORT_CODE_OFF+depth*2, "")
@@ -5082,7 +5139,8 @@ NULL
            /* now try B */
            ST.save_curlyx = cur_curlyx;
            cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
-           PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
+           PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
+                                locinput);
            assert(0); /* NOTREACHED */
 
        case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
@@ -5110,12 +5168,12 @@ NULL
                "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
            );
            /* Try grabbing another A and see if it helps. */
-           PL_reginput = locinput;
            cur_curlyx->u.curlyx.lastloc = locinput;
            ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
            REGCP_SET(ST.lastcp);
            PUSH_STATE_GOTO(WHILEM_A_min,
-               /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS);
+               /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
+                locinput);
            assert(0); /* NOTREACHED */
 
 #undef  ST
@@ -5134,21 +5192,21 @@ NULL
            ST.lastcloseparen = rex->lastcloseparen;
            ST.next_branch = next;
            REGCP_SET(ST.cp);
-           PL_reginput = locinput;
 
            /* Now go into the branch */
            if (has_cutgroup) {
-               PUSH_YES_STATE_GOTO(BRANCH_next, scan);    
+               PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput);
            } else {
-               PUSH_STATE_GOTO(BRANCH_next, scan);
+               PUSH_STATE_GOTO(BRANCH_next, scan, locinput);
            }
            assert(0); /* NOTREACHED */
-        case CUTGROUP:
-            PL_reginput = locinput;
+
+        case CUTGROUP:  /*  /(*THEN)/  */
             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
                 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
-            PUSH_STATE_GOTO(CUTGROUP_next,next);
+            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
             assert(0); /* NOTREACHED */
+
         case CUTGROUP_next_fail:
             do_cutgroup = 1;
             no_final = 1;
@@ -5156,9 +5214,11 @@ NULL
                 sv_commit = st->u.mark.mark_name;
             sayNO;         
             assert(0); /* NOTREACHED */
+
         case BRANCH_next:
             sayYES;
             assert(0); /* NOTREACHED */
+
        case BRANCH_next_fail: /* that branch failed; try the next, if any */
            if (do_cutgroup) {
                do_cutgroup = 0;
@@ -5181,7 +5241,7 @@ NULL
            continue; /* execute next BRANCH[J] op */
            assert(0); /* NOTREACHED */
     
-       case MINMOD:
+       case MINMOD: /* next op will be non-greedy, e.g. A*?  */
            minmod = 1;
            break;
 
@@ -5222,26 +5282,22 @@ NULL
                goto curlym_do_B;
 
          curlym_do_A: /* execute the A in /A{m,n}B/  */
-           PL_reginput = locinput;
-           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
+           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput); /* match A */
            assert(0); /* NOTREACHED */
 
        case CURLYM_A: /* we've just matched an A */
-           locinput = st->locinput;
-           nextchr = UCHARAT(locinput);
-
            ST.count++;
            /* after first match, determine A's length: u.curlym.alen */
            if (ST.count == 1) {
                if (PL_reg_match_utf8) {
-                   char *s = locinput;
-                   while (s < PL_reginput) {
+                   char *s = st->locinput;
+                   while (s < locinput) {
                        ST.alen++;
                        s += UTF8SKIP(s);
                    }
                }
                else {
-                   ST.alen = PL_reginput - locinput;
+                   ST.alen = locinput - st->locinput;
                }
                if (ST.alen == 0)
                    ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
@@ -5253,8 +5309,6 @@ NULL
                          (IV) ST.count, (IV)ST.alen)
            );
 
-           locinput = PL_reginput;
-                       
            if (cur_eval && cur_eval->u.eval.close_paren && 
                cur_eval->u.eval.close_paren == (U32)ST.me->flags) 
                goto fake_end;
@@ -5275,7 +5329,6 @@ NULL
                sayNO;
 
          curlym_do_B: /* execute the B in /A{m,n}B/  */
-           PL_reginput = locinput;
            if (ST.c1 == CHRTEST_UNINIT) {
                /* calculate c1 and c2 for possible match of 1st char
                 * following curly */
@@ -5316,9 +5369,10 @@ NULL
                    (int)(REPORT_CODE_OFF+(depth*2)),
                    "", (IV)ST.count)
                );
-           if (ST.c1 != CHRTEST_VOID
-                   && UCHARAT(PL_reginput) != ST.c1
-                   && UCHARAT(PL_reginput) != ST.c2)
+           if (       !NEXTCHR_IS_EOS
+                    && ST.c1 != CHRTEST_VOID
+                   && nextchr != ST.c1
+                   && nextchr != ST.c2)
            {
                /* simulate B failing */
                DEBUG_OPTIMISE_r(
@@ -5336,8 +5390,8 @@ NULL
                I32 paren = ST.me->flags;
                if (ST.count) {
                    rex->offs[paren].start
-                       = HOPc(PL_reginput, -ST.alen) - PL_bostr;
-                   rex->offs[paren].end = PL_reginput - PL_bostr;
+                       = HOPc(locinput, -ST.alen) - PL_bostr;
+                   rex->offs[paren].end = locinput - PL_bostr;
                    if ((U32)paren > rex->lastparen)
                        rex->lastparen = paren;
                    rex->lastcloseparen = paren;
@@ -5354,7 +5408,7 @@ NULL
                }
            }
            
-           PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
+           PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput); /* match B */
            assert(0); /* NOTREACHED */
 
        case CURLYM_B_fail: /* just failed to match a B */
@@ -5370,7 +5424,7 @@ NULL
            if (ST.count == ARG1(ST.me) /* min */)
                sayNO;
            ST.count--;
-           locinput = HOPc(locinput, -ST.alen);
+           SET_locinput(HOPc(locinput, -ST.alen));
            goto curlym_do_B; /* try to match B */
 
 #undef ST
@@ -5398,12 +5452,14 @@ NULL
            ST.max = REG_INFTY;
            scan = NEXTOPER(scan);
            goto repeat;
+
        case PLUS:              /*  /A+B/ where A is width 1 */
            ST.paren = 0;
            ST.min = 1;
            ST.max = REG_INFTY;
            scan = NEXTOPER(scan);
            goto repeat;
+
        case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
            ST.paren = scan->flags;     /* Which paren to set */
            ST.lastparen      = rex->lastparen;
@@ -5419,6 +5475,7 @@ NULL
            }
             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
            goto repeat;
+
        case CURLY:             /*  /A{m,n}B/ where A is width 1 */
            ST.paren = 0;
            ST.min = ARG1(scan);  /* min to match */
@@ -5493,13 +5550,13 @@ NULL
 
            ST.A = scan;
            ST.B = next;
-           PL_reginput = locinput;
            if (minmod) {
+                char *li = locinput;
                minmod = 0;
-               if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
+               if (ST.min && regrepeat(rex, &li, ST.A, ST.min, depth) < ST.min)
                    sayNO;
+                SET_locinput(li);
                ST.count = ST.min;
-               locinput = PL_reginput;
                REGCP_SET(ST.cp);
                if (ST.c1 == CHRTEST_VOID)
                    goto curly_try_B_min;
@@ -5529,10 +5586,13 @@ NULL
 
            }
            else {
-               ST.count = regrepeat(rex, ST.A, ST.max, depth);
-               locinput = PL_reginput;
+                /* avoid taking address of locinput, so it can remain
+                 * a register var */
+                char *li = locinput;
+               ST.count = regrepeat(rex, &li, ST.A, ST.max, depth);
                if (ST.count < ST.min)
                    sayNO;
+                SET_locinput(li);
                if ((ST.count > ST.min)
                    && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
                {
@@ -5542,7 +5602,7 @@ NULL
                    /* ...except that $ and \Z can match before *and* after
                       newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
                       We may back off by one in this case. */
-                   if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
+                   if (UCHARAT(locinput - 1) == '\n' && OP(ST.B) != EOS)
                        ST.min--;
                }
                REGCP_SET(ST.cp);
@@ -5554,7 +5614,6 @@ NULL
        case CURLY_B_min_known_fail:
            /* failed to find B in a non-greedy match where c1,c2 valid */
 
-           PL_reginput = locinput;     /* Could be reset... */
            REGCP_UNWIND(ST.cp);
             if (ST.paren) {
                 UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
@@ -5613,19 +5672,22 @@ NULL
                }
                if (locinput > ST.maxpos)
                    sayNO;
-               /* PL_reginput == oldloc now */
                if (n) {
+                    /* In /a{m,n}b/, ST.oldloc is at "a" x m, locinput is
+                     * at b; check that everything between oldloc and
+                     * locinput matches */
+                    char *li = ST.oldloc;
                    ST.count += n;
-                   if (regrepeat(rex, ST.A, n, depth) < n)
+                   if (regrepeat(rex, &li, ST.A, n, depth) < n)
                        sayNO;
+                    assert(n == REG_INFTY || locinput == li);
                }
-               PL_reginput = locinput;
                CURLY_SETPAREN(ST.paren, ST.count);
                if (cur_eval && cur_eval->u.eval.close_paren && 
                    cur_eval->u.eval.close_paren == (U32)ST.paren) {
                    goto fake_end;
                }
-               PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
+               PUSH_STATE_GOTO(CURLY_B_min_known, ST.B, locinput);
            }
            assert(0); /* NOTREACHED */
 
@@ -5638,10 +5700,15 @@ NULL
                 UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
             }
            /* failed -- move forward one */
-           PL_reginput = locinput;
-           if (regrepeat(rex, ST.A, 1, depth)) {
+            {
+                char *li = locinput;
+                if (!regrepeat(rex, &li, ST.A, 1, depth)) {
+                    sayNO;
+                }
+                locinput = li;
+            }
+            {
                ST.count++;
-               locinput = PL_reginput;
                if (ST.count <= ST.max || (ST.max == REG_INFTY &&
                        ST.count > 0)) /* count overflow ? */
                {
@@ -5651,10 +5718,9 @@ NULL
                        cur_eval->u.eval.close_paren == (U32)ST.paren) {
                         goto fake_end;
                     }
-                   PUSH_STATE_GOTO(CURLY_B_min, ST.B);
+                   PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
                }
            }
-           sayNO;
            assert(0); /* NOTREACHED */
 
 
@@ -5666,18 +5732,22 @@ NULL
             }
            {
                UV c = 0;
-               if (ST.c1 != CHRTEST_VOID)
-                   c = utf8_target ? utf8n_to_uvchr((U8*)PL_reginput,
+               if (ST.c1 != CHRTEST_VOID && locinput < PL_regeol)
+                   c = utf8_target ? utf8n_to_uvchr((U8*)locinput,
                                           UTF8_MAXBYTES, 0, uniflags)
-                               : (UV) UCHARAT(PL_reginput);
+                               : (UV) UCHARAT(locinput);
                /* If it could work, try it. */
-               if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
+               if (ST.c1 == CHRTEST_VOID
+                    || (locinput < PL_regeol &&
+                        (c == (UV)ST.c1 || c == (UV)ST.c2)))
+                {
                    CURLY_SETPAREN(ST.paren, ST.count);
-                   PUSH_STATE_GOTO(CURLY_B_max, ST.B);
+                   PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput);
                    assert(0); /* NOTREACHED */
                }
            }
            /* FALL THROUGH */
+
        case CURLY_B_max_fail:
            /* failed to find B in a greedy match */
 
@@ -5688,12 +5758,12 @@ NULL
            /*  back up. */
            if (--ST.count < ST.min)
                sayNO;
-           PL_reginput = locinput = HOPc(locinput, -1);
+           locinput = HOPc(locinput, -1);
            goto curly_try_B_max;
 
 #undef ST
 
-       case END:
+       case END: /*  last op of main pattern  */
            fake_end:
            if (cur_eval) {
                /* we've just finished A in /(??{A})B/; now continue with B */
@@ -5710,7 +5780,6 @@ NULL
                cur_curlyx = cur_eval->u.eval.prev_curlyx;
 
                REGCP_SET(st->u.eval.lastcp);
-               PL_reginput = locinput;
 
                /* Restore parens of the outer rex without popping the
                 * savestack */
@@ -5724,8 +5793,8 @@ NULL
                 if ( nochange_depth )
                    nochange_depth--;
 
-                PUSH_YES_STATE_GOTO(EVAL_AB,
-                       st->u.eval.prev_eval->u.eval.B); /* match B */
+                PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B,
+                                    locinput); /* match B */
            }
 
            if (locinput < reginfo->till) {
@@ -5738,7 +5807,6 @@ NULL
                                                      
                sayNO_SILENT;           /* Cannot match: too short. */
            }
-           PL_reginput = locinput;     /* put where regtry can find it */
            sayYES;                     /* Success! */
 
        case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
@@ -5746,15 +5814,17 @@ NULL
            PerlIO_printf(Perl_debug_log,
                "%*s  %ssubpattern success...%s\n",
                REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
-           PL_reginput = locinput;     /* put where regtry can find it */
            sayYES;                     /* Success! */
 
 #undef  ST
 #define ST st->u.ifmatch
 
+        {
+            char *newstart;
+
        case SUSPEND:   /* (?>A) */
            ST.wanted = 1;
-           PL_reginput = locinput;
+           newstart = locinput;
            goto do_ifmatch;    
 
        case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
@@ -5779,10 +5849,10 @@ NULL
                        next = NULL;
                    break;
                }
-               PL_reginput = s;
+               newstart = s;
            }
            else
-               PL_reginput = locinput;
+               newstart = locinput;
 
          do_ifmatch:
            ST.me = scan;
@@ -5790,8 +5860,9 @@ NULL
            logical = 0; /* XXX: reset state of logical once it has been saved into ST */
            
            /* execute body of (?...A) */
-           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
+           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), newstart);
            assert(0); /* NOTREACHED */
+        }
 
        case IFMATCH_A_fail: /* body of (?...A) failed */
            ST.wanted = !ST.wanted;
@@ -5804,11 +5875,9 @@ NULL
            else if (!ST.wanted)
                sayNO;
 
-           if (OP(ST.me) == SUSPEND)
-               locinput = PL_reginput;
-           else {
-               locinput = PL_reginput = st->locinput;
-               nextchr = UCHARAT(locinput);
+           if (OP(ST.me) != SUSPEND) {
+                /* restore old position except for (?>...) */
+               locinput = st->locinput;
            }
            scan = ST.me + ARG(ST.me);
            if (scan == ST.me)
@@ -5817,40 +5886,46 @@ NULL
 
 #undef ST
 
-       case LONGJMP:
+       case LONGJMP: /*  alternative with many branches compiles to
+                       * (BRANCHJ; EXACT ...; LONGJMP ) x N */
            next = scan + ARG(scan);
            if (next == scan)
                next = NULL;
            break;
-       case COMMIT:
+
+       case COMMIT:  /*  (*COMMIT)  */
            reginfo->cutpoint = PL_regeol;
            /* FALLTHROUGH */
-       case PRUNE:
-           PL_reginput = locinput;
+
+       case PRUNE:   /*  (*PRUNE)   */
            if (!scan->flags)
                sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
-           PUSH_STATE_GOTO(COMMIT_next,next);
+           PUSH_STATE_GOTO(COMMIT_next, next, locinput);
            assert(0); /* NOTREACHED */
+
        case COMMIT_next_fail:
            no_final = 1;    
            /* FALLTHROUGH */       
-       case OPFAIL:
+
+       case OPFAIL:   /* (*FAIL)  */
            sayNO;
            assert(0); /* NOTREACHED */
 
 #define ST st->u.mark
-        case MARKPOINT:
+        case MARKPOINT: /*  (*MARK:foo)  */
             ST.prev_mark = mark_state;
             ST.mark_name = sv_commit = sv_yes_mark 
                 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
             mark_state = st;
-            ST.mark_loc = PL_reginput = locinput;
-            PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
+            ST.mark_loc = locinput;
+            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput);
             assert(0); /* NOTREACHED */
+
         case MARKPOINT_next:
             mark_state = ST.prev_mark;
             sayYES;
             assert(0); /* NOTREACHED */
+
         case MARKPOINT_next_fail:
             if (popmark && sv_eq(ST.mark_name,popmark)) 
             {
@@ -5871,13 +5946,13 @@ NULL
                 mark_state->u.mark.mark_name : NULL;
             sayNO;
             assert(0); /* NOTREACHED */
-        case SKIP:
-            PL_reginput = locinput;
+
+        case SKIP:  /*  (*SKIP)  */
             if (scan->flags) {
                 /* (*SKIP) : if we fail we cut here*/
                 ST.mark_name = NULL;
                 ST.mark_loc = locinput;
-                PUSH_STATE_GOTO(SKIP_next,next);    
+                PUSH_STATE_GOTO(SKIP_next,next, locinput);
             } else {
                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was, 
                    otherwise do nothing.  Meaning we need to scan 
@@ -5890,13 +5965,14 @@ NULL
                                 find ) ) 
                     {
                         ST.mark_name = find;
-                        PUSH_STATE_GOTO( SKIP_next, next );
+                        PUSH_STATE_GOTO( SKIP_next, next, locinput);
                     }
                     cur = cur->u.mark.prev_mark;
                 }
             }    
             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
             break;    
+
        case SKIP_next_fail:
            if (ST.mark_name) {
                /* (*CUT:NAME) - Set up to search for the name as we 
@@ -5916,43 +5992,54 @@ NULL
             sayNO;
             assert(0); /* NOTREACHED */
 #undef ST
-        case LNBREAK:
-            if ((n=is_LNBREAK(locinput,utf8_target))) {
+
+        case LNBREAK: /* \R */
+            if ((n=is_LNBREAK_safe(locinput, PL_regeol, utf8_target))) {
                 locinput += n;
-                nextchr = UCHARAT(locinput);
             } else
                 sayNO;
             break;
 
 #define CASE_CLASS(nAmE)                              \
         case nAmE:                                    \
-           if (locinput >= PL_regeol)                \
+           if (NEXTCHR_IS_EOS)                       \
                sayNO;                                \
             if ((n=is_##nAmE(locinput,utf8_target))) {    \
                 locinput += n;                        \
-                nextchr = UCHARAT(locinput);          \
             } else                                    \
                 sayNO;                                \
             break;                                    \
         case N##nAmE:                                 \
-           if (locinput >= PL_regeol)                \
+           if (NEXTCHR_IS_EOS)                       \
                sayNO;                                \
             if ((n=is_##nAmE(locinput,utf8_target))) {    \
                 sayNO;                                \
             } else {                                  \
                 locinput += UTF8SKIP(locinput);       \
-                nextchr = UCHARAT(locinput);          \
             }                                         \
             break
 
-        CASE_CLASS(VERTWS);
-        CASE_CLASS(HORIZWS);
+        CASE_CLASS(VERTWS);  /*  \v \V  */
+        CASE_CLASS(HORIZWS); /*  \h \H  */
 #undef CASE_CLASS
 
        default:
            PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
                          PTR2UV(scan), OP(scan));
            Perl_croak(aTHX_ "regexp memory corruption");
+
+        /* this is a point to jump to in order to increment
+         * locinput by one character */
+        increment_locinput:
+            if (utf8_target) {
+                locinput += PL_utf8skip[nextchr];
+                /* locinput is allowed to go 1 char off the end, but not 2+ */
+                if (locinput > PL_regeol)
+                    sayNO;
+            }
+            else
+                locinput++;
+            break;
            
        } /* end switch */ 
 
@@ -5999,8 +6086,7 @@ NULL
                newst = S_push_slab(aTHX);
            PL_regmatch_state = newst;
 
-           locinput = PL_reginput;
-           nextchr = UCHARAT(locinput);
+           locinput = pushinput;
            st = newst;
            continue;
            assert(0); /* NOTREACHED */
@@ -6051,10 +6137,8 @@ yes:
        yes_state = st->u.yes.prev_yes_state;
        PL_regmatch_state = st;
         
-        if (no_final) {
+        if (no_final)
             locinput= st->locinput;
-            nextchr = UCHARAT(locinput);
-        }
        state_num = st->resume_state + no_final;
        goto reenter_switch;
     }
@@ -6099,7 +6183,6 @@ no_silent:
        }
        PL_regmatch_state = st;
        locinput= st->locinput;
-       nextchr = UCHARAT(locinput);
 
        DEBUG_STATE_pp("pop");
        depth--;
@@ -6138,19 +6221,22 @@ no_silent:
     /* clean up; in particular, free all slabs above current one */
     LEAVE_SCOPE(oldsave);
 
-    return result;
+    assert(!result ||  locinput - PL_bostr >= 0);
+    return result ?  locinput - PL_bostr : -1;
 }
 
 /*
  - regrepeat - repeatedly match something simple, report how many
- */
-/*
- * [This routine now assumes that it will only match on things of length 1.
- * That was true before, but now we assume scan - reginput is the count,
- * rather than incrementing count on every character.  [Er, except utf8.]]
+ *
+ * startposp - pointer a pointer to the start position.  This is updated
+ *             to point to the byte following the highest successful
+ *             match.
+ * p         - the regnode to be repeatedly matched against.
+ * max       - maximum number of characters to match.
+ * depth     - (for debugging) backtracking depth.
  */
 STATIC I32
-S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
+S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 max, int depth)
 {
     dVAR;
     char *scan;
@@ -6165,7 +6251,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
 
     PERL_ARGS_ASSERT_REGREPEAT;
 
-    scan = PL_reginput;
+    scan = *startposp;
     if (max == REG_INFTY)
        max = I32_MAX;
     else if (max < loceol - scan)
@@ -6614,7 +6700,8 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
     case LNBREAK:
         if (utf8_target) {
            loceol = PL_regeol;
-           while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
+           while (hardcount < max && scan < loceol &&
+                    (c=is_LNBREAK_utf8_safe(scan, loceol))) {
                scan += c;
                hardcount++;
            }
@@ -6624,7 +6711,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
              because we have a null terminated string, but we
              have to use hardcount in this situation
            */
-           while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
+           while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
                scan+=c;
                hardcount++;
            }
@@ -6633,24 +6720,28 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
     case HORIZWS:
         if (utf8_target) {
            loceol = PL_regeol;
-           while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
+           while (hardcount < max && scan < loceol &&
+                    (c=is_HORIZWS_utf8_safe(scan, loceol)))
+            {
                scan += c;
                hardcount++;
            }
        } else {
-           while (scan < loceol && is_HORIZWS_latin1(scan)) 
+           while (scan < loceol && is_HORIZWS_latin1_safe(scan, loceol)) 
                scan++;         
        }       
        break;
     case NHORIZWS:
         if (utf8_target) {
            loceol = PL_regeol;
-           while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
+           while (hardcount < max && scan < loceol &&
+                        !is_HORIZWS_utf8_safe(scan, loceol))
+            {
                scan += UTF8SKIP(scan);
                hardcount++;
            }
        } else {
-           while (scan < loceol && !is_HORIZWS_latin1(scan))
+           while (scan < loceol && !is_HORIZWS_latin1_safe(scan, loceol))
                scan++;
 
        }       
@@ -6658,12 +6749,14 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
     case VERTWS:
         if (utf8_target) {
            loceol = PL_regeol;
-           while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
+           while (hardcount < max && scan < loceol &&
+                            (c=is_VERTWS_utf8_safe(scan, loceol)))
+            {
                scan += c;
                hardcount++;
            }
        } else {
-           while (scan < loceol && is_VERTWS_latin1(scan)) 
+           while (scan < loceol && is_VERTWS_latin1_safe(scan, loceol)) 
                scan++;
 
        }       
@@ -6671,12 +6764,14 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
     case NVERTWS:
         if (utf8_target) {
            loceol = PL_regeol;
-           while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
+           while (hardcount < max && scan < loceol &&
+                                !is_VERTWS_utf8_safe(scan, loceol))
+            {
                scan += UTF8SKIP(scan);
                hardcount++;
            }
        } else {
-           while (scan < loceol && !is_VERTWS_latin1(scan)) 
+           while (scan < loceol && !is_VERTWS_latin1_safe(scan, loceol)) 
                scan++;
           
        }       
@@ -6689,8 +6784,8 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
     if (hardcount)
        c = hardcount;
     else
-       c = scan - PL_reginput;
-    PL_reginput = scan;
+       c = scan - *startposp;
+    *startposp = scan;
 
     DEBUG_r({
        GET_RE_DEBUG_FLAGS_DECL;