Remove deprecated flag from sv_nosharing.

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index 26825c7..97ea458 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -92,7 +92,7 @@ static const char utf8_locale_required[] =
  #ifdef DEBUGGING
  /* At least one required character in the target string is expressible only in
   * UTF-8. */
-static const char* const non_utf8_target_but_utf8_required
+static const char non_utf8_target_but_utf8_required[]
                  = "Can't match, because target string needs to be in UTF-8\n";
  #endif
  
@@ -1173,8 +1173,8 @@ Perl_re_intuit_start(pTHX_
  
      /* now look for the 'other' substring if defined */
  
-    if (utf8_target ? prog->substrs->data[other_ix].utf8_substr
-                    : prog->substrs->data[other_ix].substr)
+    if (prog->substrs->data[other_ix].utf8_substr
+        || prog->substrs->data[other_ix].substr)
      {
         /* Take into account the "other" substring. */
          char *last, *last1;
@@ -1184,6 +1184,11 @@ Perl_re_intuit_start(pTHX_
  
        do_other_substr:
          other = &prog->substrs->data[other_ix];
+        if (!utf8_target && !other->substr) {
+            if (!to_byte_substr(prog)) {
+                NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
+            }
+        }
  
          /* if "other" is anchored:
           * we've previously found a floating substr starting at check_at.
@@ -1720,7 +1725,7 @@ STMT_START {
          } else {                                                                    \
              uvc = _toFOLD_utf8_flags( (const U8*) uc, uc_end, foldbuf, &foldlen,    \
                                                                              flags); \
-            len = UTF8SKIP(uc);                                                     \
+            len = UTF8_SAFE_SKIP(uc, uc_end);                                       \
              skiplen = UVCHR_SKIP( uvc );                                            \
              foldlen -= skiplen;                                                     \
              uscan = foldbuf + skiplen;                                              \
@@ -1782,7 +1787,9 @@ STMT_START {
      STMT_START {                                            \
          while (s < strend) {                                \
              CODE                                            \
-            s += ((UTF8) ? UTF8SKIP(s) : 1);                \
+            s += ((UTF8)                                    \
+                  ? UTF8_SAFE_SKIP(s, reginfo->strend)      \
+                  : 1);                                     \
          }                                                   \
      } STMT_END
  
@@ -1796,7 +1803,7 @@ STMT_START {
  #define REXEC_FBC_CLASS_SCAN_GUTS(UTF8, COND)                  \
      if (COND) {                                                \
          FBC_CHECK_AND_TRY                                      \
-        s += ((UTF8) ? UTF8SKIP(s) : 1);                       \
+        s += ((UTF8) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1);\
          previous_occurrence_end = s;                           \
      }                                                          \
      else {                                                     \
@@ -1815,12 +1822,13 @@ STMT_START {
   * of the one we're looking for.  Knowing that, we can see right away if the
   * next occurrence is adjacent to the previous.  When 'doevery' is FALSE, we
   * don't accept the 2nd and succeeding adjacent occurrences */
-#define FBC_CHECK_AND_TRY                                      \
-        if (   (   doevery                                     \
-                || s != previous_occurrence_end)               \
-            && (reginfo->intuit || regtry(reginfo, &s)))       \
-        {                                                      \
-            goto got_it;                                       \
+#define FBC_CHECK_AND_TRY                                           \
+        if (   (   doevery                                          \
+                || s != previous_occurrence_end)                    \
+            && (   reginfo->intuit                                  \
+                || (s <= reginfo->strend && regtry(reginfo, &s))))  \
+        {                                                           \
+            goto got_it;                                            \
          }
  
  
@@ -1839,6 +1847,28 @@ STMT_START {
          previous_occurrence_end = s;                        \
      }
  
+/* This differs from the above macros in that it is passed a single byte that
+ * is known to begin the next occurrence of the thing being looked for in 's'.
+ * It does a memchr to find the next occurrence of 'byte', before trying 'COND'
+ * at that position. */
+#define REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(byte, COND)      \
+    while (s < strend) {                                    \
+        s = (char *) memchr(s, byte, strend -s);            \
+        if (s == NULL) {                                    \
+            s = (char *) strend;                            \
+            break;                                          \
+        }                                                   \
+                                                            \
+        if (COND) {                                         \
+            FBC_CHECK_AND_TRY                               \
+            s += UTF8_SAFE_SKIP(s, reginfo->strend);        \
+            previous_occurrence_end = s;                    \
+        }                                                   \
+        else {                                              \
+            s += UTF8SKIP(s);                               \
+        }                                                   \
+    }
+
  /* The three macros below are slightly different versions of the same logic.
   *
   * The first is for /a and /aa when the target string is UTF-8.  This can only
@@ -1945,9 +1975,12 @@ STMT_START {
      }
  
  /* This is the macro to use when we want to see if something that looks like it
- * could match, actually does, and if so exits the loop */
-#define REXEC_FBC_TRYIT                            \
-    if ((reginfo->intuit || regtry(reginfo, &s)))  \
+ * could match, actually does, and if so exits the loop.  It needs to be used
+ * only for bounds checking macros, as it allows for matching beyond the end of
+ * string (which should be zero length without having to look at the string
+ * contents) */
+#define REXEC_FBC_TRYIT                                                     \
+    if (reginfo->intuit || (s <= reginfo->strend && regtry(reginfo, &s)))   \
          goto got_it
  
  /* The only difference between the BOUND and NBOUND cases is that
@@ -2129,21 +2162,47 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          break;
  
      case ANYOFM:    /* ARG() is the base byte; FLAGS() the mask byte */
-        /* UTF-8ness doesn't matter, so use 0 */
+        /* UTF-8ness doesn't matter because only matches UTF-8 invariants, so
+         * use 0 */
          REXEC_FBC_FIND_NEXT_SCAN(0,
           (char *) find_next_masked((U8 *) s, (U8 *) strend,
                                     (U8) ARG(c), FLAGS(c)));
          break;
  
-    case NANYOFM:
-        REXEC_FBC_FIND_NEXT_SCAN(0,
+    case NANYOFM:   /* UTF-8ness does matter because can match UTF-8 variants.
+                     */
+        REXEC_FBC_FIND_NEXT_SCAN(utf8_target,
           (char *) find_span_end_mask((U8 *) s, (U8 *) strend,
                                     (U8) ARG(c), FLAGS(c)));
          break;
  
      case ANYOFH:
-        if (utf8_target) REXEC_FBC_CLASS_SCAN(TRUE,
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   (U8) NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
+        break;
+
+    case ANYOFHb:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+
+            /* We know what the first byte of any matched string should be */
+            U8 first_byte = FLAGS(c);
+
+            REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
                        reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
+        }
+        break;
+
+    case ANYOFHr:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   inRANGE((U8) NATIVE_UTF8_TO_I8(*s),
+                              LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)),
+                              HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)))
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
          break;
  
      case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
@@ -2355,7 +2414,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
              {
                  goto got_it;
              }
-            s += (utf8_target) ? UTF8SKIP(s) : 1;
+            s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
          }
          break;
      }
@@ -2439,7 +2498,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
  
                      /* Didn't match.  Try at the next position (if there is one) */
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2463,7 +2522,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                              goto got_it;
                          }
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8.  Everything is a GCB except between CR and
@@ -2481,7 +2540,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
  
                  /* And, since this is a bound, it can match after the final
                   * character in the string */
-                if ((reginfo->intuit || regtry(reginfo, &s))) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
                  break;
@@ -2491,7 +2552,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if (reginfo->intuit || regtry(reginfo, &s)) {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2515,7 +2576,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                              goto got_it;
                          }
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8. */
@@ -2537,7 +2598,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
                  }
  
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
  
@@ -2548,7 +2611,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if (reginfo->intuit || regtry(reginfo, &s)) {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2573,7 +2636,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                              goto got_it;
                          }
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8. */
@@ -2598,7 +2661,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                  /* Here are at the final position in the target string.  The SB
                   * value is always true here, so matches, depending on other
                   * constraints */
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
  
@@ -2609,7 +2674,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if (reginfo->intuit || regtry(reginfo, &s)) {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2643,7 +2708,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                          }
                          previous = before;
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8. */
@@ -2668,7 +2733,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
                  }
  
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
          }
@@ -2985,7 +3052,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                          LEAVE;
                          goto got_it;
                      }
-                    s = HOPc(s,1);
+                    if (s < reginfo->strend) {
+                        s = HOPc(s,1);
+                    }
                      DEBUG_TRIE_EXECUTE_r({
                          Perl_re_printf( aTHX_ "Pattern failed. Looking for new start point...\n");
                      });
@@ -3305,7 +3374,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
              RXp_MATCH_UTF8_set(prog, utf8_target);
              prog->offs[0].start = s - strbeg;
              prog->offs[0].end = utf8_target
-                ? (char*)utf8_hop((U8*)s, prog->minlenret) - strbeg
+                ? (char*)utf8_hop_forward((U8*)s, prog->minlenret, (U8 *) strend) - strbeg
                  : s - strbeg + prog->minlenret;
              if ( !(flags & REXEC_NOT_FIRST) )
                  S_reg_set_capture_string(aTHX_ rx,
@@ -3504,7 +3573,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                 if (*s == ch) {
                     DEBUG_EXECUTE_r( did_match = 1 );
                     if (regtry(reginfo, &s)) goto got_it;
-                   s += UTF8SKIP(s);
+                   s += UTF8_SAFE_SKIP(s, strend);
                     while (s < strend && *s == ch)
                         s += UTF8SKIP(s);
                 }
@@ -5465,16 +5534,20 @@ S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos,
  
  /* push a new state then goto it */
  
-#define PUSH_STATE_GOTO(state, node, input) \
+#define PUSH_STATE_GOTO(state, node, input, eol, sr0)       \
      pushinput = input; \
+    pusheol = eol; \
+    pushsr0 = sr0; \
      scan = node; \
      st->resume_state = state; \
      goto push_state;
  
  /* push a new state with success backtracking, then goto it */
  
-#define PUSH_YES_STATE_GOTO(state, node, input) \
+#define PUSH_YES_STATE_GOTO(state, node, input, eol, sr0)   \
      pushinput = input; \
+    pusheol = eol;     \
+    pushsr0 = sr0; \
      scan = node; \
      st->resume_state = state; \
      goto push_yes_state;
@@ -5595,8 +5668,8 @@ The topmost backtrack state, pointed to by st, is usually free. If you
  want to claim it, populate any ST.foo fields in it with values you wish to
  save, then do one of
  
-       PUSH_STATE_GOTO(resume_state, node, newinput);
-       PUSH_YES_STATE_GOTO(resume_state, node, newinput);
+       PUSH_STATE_GOTO(resume_state, node, newinput, new_eol);
+       PUSH_YES_STATE_GOTO(resume_state, node, newinput, new_eol);
  
  which sets that backtrack state's resume value to 'resume_state', pushes a
  new free entry to the top of the backtrack stack, then goes to 'node'.
@@ -5624,6 +5697,16 @@ allocated, and is never freed until interpreter destruction. When the slab
  is full, a new one is allocated and chained to the end. At exit from
  regmatch(), slabs allocated since entry are freed.
  
+In order to work with variable length lookbehinds, an upper limit is placed on
+lookbehinds which is set to where the match position is at the end of where the
+lookbehind would get to.  Nothing in the lookbehind should match above that,
+except we should be able to look beyond if for things like \b, which need the
+next character in the string to be able to determine if this is a boundary or
+not.  We also can't match the end of string/line unless we are also at the end
+of the entire string, so NEXTCHR_IS_EOS remains the same, and for those OPs
+that match a width, we have to add a condition that they are within the legal
+bounds of our window into the string.
+
  */
  
  /* returns -1 on failure, $+[0] on success */
@@ -5645,7 +5728,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
      SSize_t ln = 0; /* len or last;  init to avoid compiler warning */
      SSize_t endref = 0; /* offset of end of backref when ln is start */
      char *locinput = startpos;
+    char *loceol = reginfo->strend;
      char *pushinput; /* where to continue after a PUSH */
+    char *pusheol;   /* where to stop matching (loceol) after a PUSH */
+    U8   *pushsr0;   /* save starting pos of script run */
      I32 nextchr;   /* is always set to UCHARAT(locinput), or -1 at EOS */
  
      bool result = 0;       /* return value of S_regmatch */
@@ -5782,7 +5868,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             /* update the startpoint */
             st->u.keeper.val = rex->offs[0].start;
             rex->offs[0].start = locinput - reginfo->strbeg;
-           PUSH_STATE_GOTO(KEEPS_next, next, locinput);
+           PUSH_STATE_GOTO(KEEPS_next, next, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case KEEPS_next_fail:
@@ -5809,13 +5896,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
  
         case SANY: /*  /./s  */
-           if (NEXTCHR_IS_EOS)
+           if (NEXTCHR_IS_EOS || locinput >= loceol)
                 sayNO;
              goto increment_locinput;
  
         case REG_ANY: /*  /./  */
-           if ((NEXTCHR_IS_EOS) || nextchr == '\n')
+           if (   NEXTCHR_IS_EOS
+                || locinput >= loceol
+                || nextchr == '\n')
+            {
                 sayNO;
+            }
              goto increment_locinput;
  
  
@@ -5825,7 +5916,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* In this case the charclass data is available inline so
                 we can fail fast without a lot of extra overhead. 
               */
-            if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
+            if ( !   NEXTCHR_IS_EOS
+                &&   locinput < loceol
+                && ! ANYOF_BITMAP_TEST(scan, nextchr))
+            {
                  DEBUG_EXECUTE_r(
                      Perl_re_exec_indentf( aTHX_  "%sTRIE: failed to match trie start class...%s\n",
                                depth, PL_colors[4], PL_colors[5])
@@ -5904,7 +5998,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      }
                  }
                  if (   trie->bitmap
-                    && (NEXTCHR_IS_EOS || !TRIE_BITMAP_TEST(trie, nextchr)))
+                    && (     NEXTCHR_IS_EOS
+                        ||   locinput >= loceol
+                        || ! TRIE_BITMAP_TEST(trie, nextchr)))
                  {
                     if (trie->states[ state ].wordnum) {
                          DEBUG_EXECUTE_r(
@@ -5942,7 +6038,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                    shortest accept state and the wordnum of the longest
                    accept state */
  
-               while ( state && uc <= (U8*)(reginfo->strend) ) {
+               while ( state && uc <= (U8*)(loceol) ) {
                      U32 base = trie->states[ state ].trans.base;
                      UV uvc = 0;
                      U16 charid = 0;
@@ -5977,10 +6073,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                     });
  
                     /* read a char and goto next state */
-                   if ( base && (foldlen || uc < (U8*)(reginfo->strend))) {
+                   if ( base && (foldlen || uc < (U8*)(loceol))) {
                         I32 offset;
                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
-                                             (U8 *) reginfo->strend, uscan,
+                                             (U8 *) loceol, uscan,
                                               len, uvc, charid, foldlen,
                                               foldbuf, uniflags);
                         charcount++;
@@ -6104,6 +6200,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
                     while (chars) {
                         if (utf8_target) {
+                            /* XXX This assumes the length is well-formed, as
+                             * does the UTF8SKIP below */
                             uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len,
                                                     uniflags);
                             uc += len;
@@ -6147,7 +6245,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             });
  
             if ( ST.accepted > 1 || has_cutgroup || ST.jump ) {
-               PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc);
+               PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
             /* only one choice left - just continue */
@@ -6214,7 +6313,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                       * is an invariant, but there are tests in the test suite
                       * dealing with (??{...}) which violate this) */
                     while (s < e) {
-                       if (l >= reginfo->strend
+                       if (   l >= loceol
                              || UTF8_IS_ABOVE_LATIN1(* (U8*) l))
                          {
                              sayNO;
@@ -6238,7 +6337,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 else {
                     /* The target is not utf8, the pattern is utf8. */
                     while (s < e) {
-                        if (l >= reginfo->strend
+                        if (   l >= loceol
                              || UTF8_IS_ABOVE_LATIN1(* (U8*) s))
                          {
                              sayNO;
@@ -6264,7 +6363,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              else {
                  /* The target and the pattern have the same utf8ness. */
                  /* Inline the first character, for speed. */
-                if (reginfo->strend - locinput < ln
+                if (   loceol - locinput < ln
                      || UCHARAT(s) != nextchr
                      || (ln > 1 && memNE(s, locinput, ln)))
                  {
@@ -6360,7 +6459,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
               /* Either target or the pattern are utf8, or has the issue where
                * the fold lengths may differ. */
                 const char * const l = locinput;
-               char *e = reginfo->strend;
+               char *e = loceol;
  
                 if (! foldEQ_utf8_flags(l, &e, 0,  utf8_target,
                                          s, 0,  ln, is_utf8_pat,fold_utf8_flags))
@@ -6378,7 +6477,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             {
                 sayNO;
             }
-           if (reginfo->strend - locinput < ln)
+           if (loceol - locinput < ln)
                 sayNO;
             if (ln > 1 && ! folder(locinput, s, ln))
                 sayNO;
@@ -6674,7 +6773,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* FALLTHROUGH */
         case ANYOFD:  /*   /[abc]/d       */
         case ANYOF:  /*   /[abc]/       */
-            if (NEXTCHR_IS_EOS)
+            if (NEXTCHR_IS_EOS || locinput >= loceol)
                  sayNO;
             if (  (! utf8_target || UTF8_IS_INVARIANT(*locinput))
                 && ! (ANYOF_FLAGS(scan) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP))
@@ -6685,7 +6784,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 locinput++;
              }
              else {
-               if (!reginclass(rex, scan, (U8*)locinput, (U8*)reginfo->strend,
+               if (!reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
                                                                     utf8_target))
                  {
                     sayNO;
@@ -6695,14 +6794,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
  
          case ANYOFM:
-            if (NEXTCHR_IS_EOS || (UCHARAT(locinput) & FLAGS(scan)) != ARG(scan)) {
+            if (   NEXTCHR_IS_EOS
+                || (UCHARAT(locinput) & FLAGS(scan)) != ARG(scan)
+                || locinput >= loceol)
+            {
                  sayNO;
              }
              locinput++; /* ANYOFM is always single byte */
              break;
  
          case NANYOFM:
-            if (NEXTCHR_IS_EOS || (UCHARAT(locinput) & FLAGS(scan)) == ARG(scan)) {
+            if (   NEXTCHR_IS_EOS
+                || (UCHARAT(locinput) & FLAGS(scan)) == ARG(scan)
+                || locinput >= loceol)
+            {
                  sayNO;
              }
              goto increment_locinput;
@@ -6711,7 +6816,34 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
          case ANYOFH:
              if (   ! utf8_target
                  ||   NEXTCHR_IS_EOS
-               || ! reginclass(rex, scan, (U8*)locinput, (U8*)reginfo->strend,
+                ||   ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8((U8) *locinput)
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHb:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                ||   ANYOF_FLAGS(scan) != (U8) *locinput
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                  utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHr:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                || ! inRANGE((U8) NATIVE_UTF8_TO_I8(*locinput),
+                             LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)),
+                             HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)))
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
                                                                     utf8_target))
              {
                  sayNO;
@@ -6728,7 +6860,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case POSIXL:    /* \w or [:punct:] etc. under /l */
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-            if (NEXTCHR_IS_EOS)
+            if (NEXTCHR_IS_EOS || locinput >= loceol)
                  sayNO;
  
              /* Use isFOO_lc() for characters within Latin1.  (Note that
@@ -6773,7 +6905,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case NPOSIXA:   /* \W or [:^punct:] etc. under /a */
  
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
@@ -6792,7 +6924,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
               * UTF-8, and also from NPOSIXA even in UTF-8 when the current
               * character is a single byte */
  
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
@@ -6815,7 +6947,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case POSIXU:    /* \w or [:punct:] etc. under /u */
            utf8_posix:
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
@@ -6890,13 +7022,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                          }
                          break;
                  }
-                locinput += UTF8SKIP(locinput);
+                locinput += UTF8_SAFE_SKIP(locinput, reginfo->strend);
              }
              break;
  
         case CLUMP: /* Match \X: logical Unicode character.  This is defined as
                        a Unicode extended Grapheme Cluster */
-           if (NEXTCHR_IS_EOS)
+           if (NEXTCHR_IS_EOS || locinput >= loceol)
                 sayNO;
             if  (! utf8_target) {
  
@@ -6905,7 +7037,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 locinput++;         /* Match the . or CR */
                 if (nextchr == '\r' /* And if it was CR, and the next is LF,
                                        match the LF */
-                   && locinput < reginfo->strend
+                   && locinput <  loceol
                     && UCHARAT(locinput) == '\n')
                  {
                      locinput++;
@@ -6922,7 +7054,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                   * current character.  (There is always a break at the
                   * end-of-input) */
                  locinput += UTF8SKIP(locinput);
-                while (locinput < reginfo->strend) {
+                while (locinput < loceol) {
                      GCB_enum cur_gcb = getGCB_VAL_UTF8((U8*) locinput,
                                                           (U8*) reginfo->strend);
                      if (isGCB(prev_gcb, cur_gcb,
@@ -6940,7 +7072,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
             break;
              
-       case NREFFL:  /*  /\g{name}/il  */
+       case REFFLN:  /*  /\g{name}/il  */
         {   /* The capture buffer cases.  The ones beginning with N for the
                named buffers just convert to the equivalent numbered and
                pretend they were called as the corresponding numbered buffer
@@ -6960,28 +7092,28 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             utf8_fold_flags = FOLDEQ_LOCALE;
             goto do_nref;
  
-       case NREFFA:  /*  /\g{name}/iaa  */
+       case REFFAN:  /*  /\g{name}/iaa  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFA;
             utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_nref;
  
-       case NREFFU:  /*  /\g{name}/iu  */
+       case REFFUN:  /*  /\g{name}/iu  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFU;
             utf8_fold_flags = 0;
             goto do_nref;
  
-       case NREFF:  /*  /\g{name}/i  */
+       case REFFN:  /*  /\g{name}/i  */
             folder = foldEQ;
             fold_array = PL_fold;
             type = REFF;
             utf8_fold_flags = 0;
             goto do_nref;
  
-       case NREF:  /*  /\g{name}/   */
+       case REFN:  /*  /\g{name}/   */
             type = REF;
             folder = NULL;
             fold_array = NULL;
@@ -7044,11 +7176,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             if (type != REF     /* REF can do byte comparison */
                 && (utf8_target || type == REFFU || type == REFFL))
             {
-               char * limit = reginfo->strend;
+               char * limit = loceol;
  
                 /* This call case insensitively compares the entire buffer
                     * at s, with the current input starting at locinput, but
-                    * not going off the end given by reginfo->strend, and
+                    * not going off the end given by loceol, and
                      * returns in <limit> upon success, how much of the
                      * current input was matched */
                 if (! foldEQ_utf8_flags(s, NULL, endref - ln, utf8_target,
@@ -7061,13 +7193,16 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
  
             /* Not utf8:  Inline the first character, for speed. */
-           if (!NEXTCHR_IS_EOS &&
-                UCHARAT(s) != nextchr &&
-               (type == REF ||
-                UCHARAT(s) != fold_array[nextchr]))
+           if ( ! NEXTCHR_IS_EOS
+                && locinput < loceol
+                && UCHARAT(s) != nextchr
+                && (   type == REF
+                    || UCHARAT(s) != fold_array[nextchr]))
+            {
                 sayNO;
+            }
             ln = endref - ln;
-           if (locinput + ln > reginfo->strend)
+           if (locinput + ln > loceol)
                 sayNO;
             if (ln > 1 && (type == REF
                            ? memNE(s, locinput, ln)
@@ -7374,7 +7509,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  PL_curpm = PL_reg_curpm;
  
                 if (logical != 2) {
-                    PUSH_STATE_GOTO(EVAL_B, next, locinput);
+                    PUSH_STATE_GOTO(EVAL_B, next, locinput, loceol,
+                                    script_run_begin);
                     /* NOTREACHED */
                  }
             }
@@ -7474,7 +7610,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 ST.prev_eval = cur_eval;
                 cur_eval = st;
                 /* now continue from first node in postoned RE */
-               PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput);
+               PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput,
+                                    loceol, script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
         }
  
@@ -7630,7 +7767,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
             break;
  
-       case NGROUPP:  /*  (?(<name>))  */
+       case GROUPPN:  /*  (?(<name>))  */
             /* reg_check_named_buff_matched returns 0 for no match */
             sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
             break;
@@ -7774,7 +7911,8 @@ NULL
             ST.count = -1;      /* this will be updated by WHILEM */
             ST.lastloc = NULL;  /* this will be updated by WHILEM */
  
-           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput);
+           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput, loceol,
+                                script_run_begin);
             NOT_REACHED; /* NOTREACHED */
         }
  
@@ -7822,7 +7960,8 @@ NULL
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
  
-               PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput);
+               PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
  
@@ -7930,7 +8069,7 @@ NULL
                 ST.save_curlyx = cur_curlyx;
                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
-                                    locinput);
+                                    locinput, loceol, script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
  
@@ -7941,7 +8080,8 @@ NULL
                              maxopenparen);
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
-               PUSH_STATE_GOTO(WHILEM_A_max, A, locinput);
+               PUSH_STATE_GOTO(WHILEM_A_max, A, locinput, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
             goto do_whilem_B_max;
@@ -7993,7 +8133,7 @@ NULL
             ST.save_curlyx = cur_curlyx;
             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
-                                locinput);
+                                locinput, loceol, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
@@ -8024,7 +8164,7 @@ NULL
             REGCP_SET(ST.lastcp);
             PUSH_STATE_GOTO(WHILEM_A_min,
                 /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
-                locinput);
+                locinput, loceol, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
  #undef  ST
@@ -8046,9 +8186,11 @@ NULL
  
             /* Now go into the branch */
             if (has_cutgroup) {
-               PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput);
+               PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
+                                    script_run_begin);
             } else {
-               PUSH_STATE_GOTO(BRANCH_next, scan, locinput);
+               PUSH_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
+                                script_run_begin);
             }
             NOT_REACHED; /* NOTREACHED */
  
@@ -8056,7 +8198,8 @@ NULL
              sv_yes_mark = st->u.mark.mark_name = scan->flags
                  ? MUTABLE_SV(rexi->data->data[ ARG( scan ) ])
                  : NULL;
-            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
+            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol,
+                            script_run_begin);
              NOT_REACHED; /* NOTREACHED */
  
          case CUTGROUP_next_fail:
@@ -8133,7 +8276,8 @@ NULL
                 goto curlym_do_B;
  
           curlym_do_A: /* execute the A in /A{m,n}B/  */
-           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput); /* match A */
+           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput, loceol, /* match A */
+                                script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case CURLYM_A: /* we've just matched an A */
@@ -8203,8 +8347,15 @@ NULL
                 );
             if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
                  if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
-                    if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
-                        && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
+
+                           /* (We can use memEQ and memNE in this file without
+                            * having to worry about one being shorter than the
+                            * other, since the first byte of each gives the
+                            * length of the character) */
+                    if (   memNE(locinput, ST.c1_utf8, UTF8_SAFE_SKIP(locinput,
+                                                              reginfo->strend))
+                        && memNE(locinput, ST.c2_utf8, UTF8_SAFE_SKIP(locinput,
+                                                             reginfo->strend)))
                      {
                          /* simulate B failing */
                          DEBUG_OPTIMISE_r(
@@ -8250,7 +8401,8 @@ NULL
                 }
             }
             
-           PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput); /* match B */
+           PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput, loceol,   /* match B */
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case CURLYM_B_fail: /* just failed to match a B */
@@ -8313,7 +8465,7 @@ NULL
              if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
              {
                  char *li = locinput;
-                if (!regrepeat(rex, &li, scan, reginfo, 1))
+                if (!regrepeat(rex, &li, scan, loceol, reginfo, 1))
                     sayNO;
                  SET_locinput(li);
                  goto fake_end;
@@ -8369,7 +8521,7 @@ NULL
                  char *li = locinput;
                 minmod = 0;
                 if (ST.min &&
-                        regrepeat(rex, &li, ST.A, reginfo, ST.min)
+                        regrepeat(rex, &li, ST.A, loceol, reginfo, ST.min)
                              < ST.min)
                     sayNO;
                  SET_locinput(li);
@@ -8383,7 +8535,7 @@ NULL
                 /* set ST.maxpos to the furthest point along the
                  * string that could possibly match */
                 if  (ST.max == REG_INFTY) {
-                   ST.maxpos = reginfo->strend - 1;
+                   ST.maxpos = loceol - 1;
                     if (utf8_target)
                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
                             ST.maxpos--;
@@ -8391,13 +8543,13 @@ NULL
                 else if (utf8_target) {
                     int m = ST.max - ST.min;
                     for (ST.maxpos = locinput;
-                        m >0 && ST.maxpos < reginfo->strend; m--)
+                        m >0 && ST.maxpos <  loceol; m--)
                         ST.maxpos += UTF8SKIP(ST.maxpos);
                 }
                 else {
                     ST.maxpos = locinput + ST.max - ST.min;
-                   if (ST.maxpos >= reginfo->strend)
-                       ST.maxpos = reginfo->strend - 1;
+                   if (ST.maxpos >=  loceol)
+                       ST.maxpos =  loceol - 1;
                 }
                 goto curly_try_B_min_known;
  
@@ -8406,7 +8558,7 @@ NULL
                  /* avoid taking address of locinput, so it can remain
                   * a register var */
                  char *li = locinput;
-                ST.count = regrepeat(rex, &li, ST.A, reginfo, ST.max);
+                ST.count = regrepeat(rex, &li, ST.A, loceol, reginfo, ST.max);
                 if (ST.count < ST.min)
                     sayNO;
                  SET_locinput(li);
@@ -8439,7 +8591,7 @@ NULL
              if (ST.c1 == CHRTEST_VOID) {
                  /* failed -- move forward one */
                  char *li = locinput;
-                if (!regrepeat(rex, &li, ST.A, reginfo, 1)) {
+                if (!regrepeat(rex, &li, ST.A, loceol, reginfo, 1)) {
                      sayNO;
                  }
                  locinput = li;
@@ -8466,20 +8618,26 @@ NULL
                     n = (ST.oldloc == locinput) ? 0 : 1;
                     if (ST.c1 == ST.c2) {
                         /* set n to utf8_distance(oldloc, locinput) */
-                       while (locinput <= ST.maxpos
-                              && memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)))
+                       while (    locinput <= ST.maxpos
+                               &&  locinput < loceol
+                               &&  memNE(locinput, ST.c1_utf8,
+                                    UTF8_SAFE_SKIP(locinput, reginfo->strend)))
                          {
-                           locinput += UTF8SKIP(locinput);
+                           locinput += UTF8_SAFE_SKIP(locinput,
+                                                       reginfo->strend);
                             n++;
                         }
                     }
                     else {
                         /* set n to utf8_distance(oldloc, locinput) */
-                       while (locinput <= ST.maxpos
-                              && memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
-                              && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
+                       while (   locinput <= ST.maxpos
+                               && locinput < loceol
+                               && memNE(locinput, ST.c1_utf8,
+                                     UTF8_SAFE_SKIP(locinput, reginfo->strend))
+                               && memNE(locinput, ST.c2_utf8,
+                                    UTF8_SAFE_SKIP(locinput, reginfo->strend)))
                          {
-                           locinput += UTF8SKIP(locinput);
+                           locinput += UTF8_SAFE_SKIP(locinput, reginfo->strend);
                             n++;
                         }
                     }
@@ -8536,7 +8694,7 @@ NULL
                       * locinput matches */
                      char *li = ST.oldloc;
                     ST.count += n;
-                    if (regrepeat(rex, &li, ST.A, reginfo, n) < n)
+                    if (regrepeat(rex, &li, ST.A, loceol, reginfo, n) < n)
                         sayNO;
                      assert(n == REG_INFTY || locinput == li);
                 }
@@ -8544,34 +8702,36 @@ NULL
  
            curly_try_B_min:
              CURLY_SETPAREN(ST.paren, ST.count);
-            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
+            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
  
            curly_try_B_max:
             /* a successful greedy match: now try to match B */
             {
-               bool could_match = locinput < reginfo->strend;
+               bool could_match = locinput <  loceol;
  
                 /* If it could work, try it. */
                  if (ST.c1 != CHRTEST_VOID && could_match) {
                      if (! UTF8_IS_INVARIANT(UCHARAT(locinput)) && utf8_target)
                      {
-                        could_match = memEQ(locinput,
-                                            ST.c1_utf8,
-                                            UTF8SKIP(locinput))
-                                    || memEQ(locinput,
-                                             ST.c2_utf8,
-                                             UTF8SKIP(locinput));
+                        could_match =  memEQ(locinput, ST.c1_utf8,
+                                             UTF8_SAFE_SKIP(locinput,
+                                                            reginfo->strend))
+                                    || memEQ(locinput, ST.c2_utf8,
+                                             UTF8_SAFE_SKIP(locinput,
+                                                            reginfo->strend));
                      }
                      else {
-                        could_match = UCHARAT(locinput) == ST.c1
-                                      || UCHARAT(locinput) == ST.c2;
+                        could_match =   UCHARAT(locinput) == ST.c1
+                                     || UCHARAT(locinput) == ST.c2;
                      }
                  }
                  if (ST.c1 == CHRTEST_VOID || could_match) {
                     CURLY_SETPAREN(ST.paren, ST.count);
-                   PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput);
+                   PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput, loceol,
+                                    script_run_begin);
                     NOT_REACHED; /* NOTREACHED */
                 }
             }
@@ -8626,8 +8786,9 @@ NULL
  
                  SET_RECURSE_LOCINPUT("FAKE-END[after]", cur_eval->locinput);
  
-                PUSH_YES_STATE_GOTO(EVAL_postponed_AB, st->u.eval.prev_eval->u.eval.B,
-                                    locinput); /* match B */
+                PUSH_YES_STATE_GOTO(EVAL_postponed_AB,          /* match B */
+                                    st->u.eval.prev_eval->u.eval.B,
+                                    locinput, loceol, script_run_begin);
             }
  
             if (locinput < reginfo->till) {
@@ -8651,12 +8812,11 @@ NULL
  #undef  ST
  #define ST st->u.ifmatch
  
-        {
-            char *newstart;
-
         case SUSPEND:   /* (?>A) */
             ST.wanted = 1;
-           newstart = locinput;
+           ST.start = locinput;
+           ST.end = loceol;
+            ST.count = 1;
             goto do_ifmatch;    
  
         case UNLESSM:   /* -ve lookaround: (?!A), or with 'flags', (?<!A) */
@@ -8666,25 +8826,47 @@ NULL
         case IFMATCH:   /* +ve lookaround: (?=A), or with 'flags', (?<=A) */
             ST.wanted = 1;
           ifmatch_trivial_fail_test:
-           if (scan->flags) {
-               char * const s = HOPBACKc(locinput, scan->flags);
-               if (!s) {
-                   /* trivial fail */
-                   if (logical) {
-                       logical = 0;
-                       sw = 1 - cBOOL(ST.wanted);
-                   }
-                   else if (ST.wanted)
-                       sayNO;
-                   next = scan + ARG(scan);
-                   if (next == scan)
-                       next = NULL;
-                   break;
-               }
-               newstart = s;
+            ST.count = scan->next_off + 1; /* next_off repurposed to be
+                                              lookbehind count, requires
+                                              non-zero flags */
+           if (! scan->flags) {    /* 'flags' zero means lookahed */
+
+                /* Lookahead starts here and ends at the normal place */
+               ST.start = locinput;
+               ST.end = loceol;
+            }
+           else {
+                PERL_UINT_FAST8_T back_count = scan->flags;
+               char * s;
+
+                /* Lookbehind can look beyond the current position */
+               ST.end = loceol;
+
+                /* ... and starts at the first place in the input that is in
+                 * the range of the possible start positions */
+                for (; ST.count > 0; ST.count--, back_count--) {
+                    s = HOPBACKc(locinput, back_count);
+                    if (s) {
+                        ST.start = s;
+                        goto do_ifmatch;
+                    }
+                }
+
+                /* If the lookbehind doesn't start in the actual string, is a
+                 * trivial match failure */
+                if (logical) {
+                    logical = 0;
+                    sw = 1 - cBOOL(ST.wanted);
+                }
+                else if (ST.wanted)
+                    sayNO;
+
+                /* Here, we didn't want it to match, so is actually success */
+                next = scan + ARG(scan);
+                if (next == scan)
+                    next = NULL;
+                break;
             }
-           else
-               newstart = locinput;
  
           do_ifmatch:
             ST.me = scan;
@@ -8692,29 +8874,48 @@ NULL
             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
             
             /* execute body of (?...A) */
-           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), newstart);
+           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), ST.start,
+                                ST.end, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
-        }
+
+        {
+            bool matched;
  
         case IFMATCH_A_fail: /* body of (?...A) failed */
-           ST.wanted = !ST.wanted;
-           /* FALLTHROUGH */
+           if (! ST.logical && ST.count > 1) {
+
+                /* It isn't a real failure until we've tried all starting
+                 * positions.  Move to the next starting position and retry */
+                ST.count--;
+                ST.start = HOPc(ST.start, 1);
+                scan = ST.me;
+                logical = ST.logical;
+                goto do_ifmatch;
+            }
+
+            /* Here, all starting positions have been tried. */
+           matched = FALSE;
+           goto ifmatch_done;
  
         case IFMATCH_A: /* body of (?...A) succeeded */
-           if (ST.logical) {
-               sw = cBOOL(ST.wanted);
-           }
-           else if (!ST.wanted)
-               sayNO;
+           matched = TRUE;
+          ifmatch_done:
+            sw = matched == ST.wanted;
+           if (! ST.logical && !sw) {
+                sayNO;
+            }
  
             if (OP(ST.me) != SUSPEND) {
                  /* restore old position except for (?>...) */
                 locinput = st->locinput;
+                loceol = st->loceol;
+                script_run_begin = st->sr0;
             }
             scan = ST.me + ARG(ST.me);
             if (scan == ST.me)
                 scan = NULL;
             continue; /* execute B */
+        }
  
  #undef ST
  
@@ -8726,13 +8927,14 @@ NULL
             break;
  
         case COMMIT:  /*  (*COMMIT)  */
-           reginfo->cutpoint = reginfo->strend;
+           reginfo->cutpoint = loceol;
             /* FALLTHROUGH */
  
         case PRUNE:   /*  (*PRUNE)   */
              if (scan->flags)
                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
-           PUSH_STATE_GOTO(COMMIT_next, next, locinput);
+           PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case COMMIT_next_fail:
@@ -8762,7 +8964,8 @@ NULL
                  = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
              mark_state = st;
              ST.mark_loc = locinput;
-            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput);
+            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput, loceol,
+                                script_run_begin);
              NOT_REACHED; /* NOTREACHED */
  
          case MARKPOINT_next:
@@ -8795,7 +8998,8 @@ NULL
                  /* (*SKIP) : if we fail we cut here*/
                  ST.mark_name = NULL;
                  ST.mark_loc = locinput;
-                PUSH_STATE_GOTO(SKIP_next,next, locinput);
+                PUSH_STATE_GOTO(SKIP_next,next, locinput, loceol,
+                                script_run_begin);
              } else {
                  /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was, 
                     otherwise do nothing.  Meaning we need to scan 
@@ -8808,7 +9012,8 @@ NULL
                                  find ) ) 
                      {
                          ST.mark_name = find;
-                        PUSH_STATE_GOTO( SKIP_next, next, locinput);
+                        PUSH_STATE_GOTO( SKIP_next, next, locinput, loceol,
+                                         script_run_begin);
                      }
                      cur = cur->u.mark.prev_mark;
                  }
@@ -8837,7 +9042,7 @@ NULL
  #undef ST
  
          case LNBREAK: /* \R */
-            if ((n=is_LNBREAK_safe(locinput, reginfo->strend, utf8_target))) {
+            if ((n=is_LNBREAK_safe(locinput, loceol, utf8_target))) {
                  locinput += n;
              } else
                  sayNO;
@@ -8856,7 +9061,7 @@ NULL
                  locinput += PL_utf8skip[nextchr];
                  /* locinput is allowed to go 1 char off the end (signifying
                   * EOS), but not 2+ */
-                if (locinput > reginfo->strend)
+                if (locinput >  loceol)
                      sayNO;
              }
              else
@@ -8904,12 +9109,16 @@ NULL
              );
             depth++;
             st->locinput = locinput;
+           st->loceol = loceol;
+            st->sr0 = script_run_begin;
             newst = st+1; 
             if (newst >  SLAB_LAST(PL_regmatch_slab))
                 newst = S_push_slab(aTHX);
             PL_regmatch_state = newst;
  
             locinput = pushinput;
+            loceol = pusheol;
+            script_run_begin = pushsr0;
             st = newst;
             continue;
              /* NOTREACHED */
@@ -8962,8 +9171,11 @@ NULL
         yes_state = st->u.yes.prev_yes_state;
         PL_regmatch_state = st;
          
-        if (no_final)
+        if (no_final) {
              locinput= st->locinput;
+            loceol= st->loceol;
+            script_run_begin = st->sr0;
+        }
         state_num = st->resume_state + no_final;
         goto reenter_switch;
      }
@@ -9013,6 +9225,8 @@ NULL
         }
         PL_regmatch_state = st;
         locinput= st->locinput;
+       loceol= st->loceol;
+        script_run_begin = st->sr0;
  
         DEBUG_STATE_pp("pop");
         depth--;
@@ -9068,18 +9282,20 @@ NULL
   *             to point to the byte following the highest successful
   *             match.
   * p         - the regnode to be repeatedly matched against.
- * reginfo   - struct holding match state, such as strend
+ * loceol    - pointer to the end position beyond which we aren't supposed to
+ *             look.
+ * reginfo   - struct holding match state, such as utf8_target
   * max       - maximum number of things to match.
   * depth     - (for debugging) backtracking depth.
   */
  STATIC I32
  S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
-            regmatch_info *const reginfo, I32 max _pDEPTH)
+            char * loceol, regmatch_info *const reginfo, I32 max _pDEPTH)
  {
      dVAR;
      char *scan;     /* Pointer to current position in target string */
      I32 c;
-    char *loceol = reginfo->strend;   /* local version */
+    char *this_eol = loceol;   /* potentially adjusted version. */
      I32 hardcount = 0;  /* How many matches so far */
      bool utf8_target = reginfo->is_utf8_target;
      unsigned int to_complement = 0;  /* Invert the result? */
@@ -9097,15 +9313,15 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      if (max == REG_INFTY)   /* This is a special marker to go to the platform's
                                 max */
         max = I32_MAX;
-    else if (! utf8_target && loceol - scan > max)
-       loceol = scan + max;
+    else if (! utf8_target && this_eol - scan > max)
+       this_eol = scan + max;
  
-    /* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
+    /* Here, for the case of a non-UTF-8 target we have adjusted <this_eol> down
       * to the maximum of how far we should go in it (leaving it set to the real
       * end, if the maximum permissible would take us beyond that).  This allows
-     * us to make the loop exit condition that we haven't gone past <loceol> to
+     * us to make the loop exit condition that we haven't gone past <this_eol> to
       * also mean that we haven't exceeded the max permissible count, saving a
-     * test each time through the loop.  But it assumes that the OP matches a
+     * test each time through the loops.  But it assumes that the OP matches a
       * single byte, which is true for most of the OPs below when applied to a
       * non-UTF-8 target.  Those relatively few OPs that don't have this
       * characteristic will have to compensate.
@@ -9113,39 +9329,39 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
       * There is no adjustment for UTF-8 targets, as the number of bytes per
       * character varies.  OPs will have to test both that the count is less
       * than the max permissible (using <hardcount> to keep track), and that we
-     * are still within the bounds of the string (using <loceol>.  A few OPs
+     * are still within the bounds of the string (using <this_eol>.  A few OPs
       * match a single byte no matter what the encoding.  They can omit the max
       * test if, for the UTF-8 case, they do the adjustment that was skipped
       * above.
       *
       * Thus, the code above sets things up for the common case; and exceptional
       * cases need extra work; the common case is to make sure <scan> doesn't
-     * go past <loceol>, and for UTF-8 to also use <hardcount> to make sure the
+     * go past <this_eol>, and for UTF-8 to also use <hardcount> to make sure the
       * count doesn't exceed the maximum permissible */
  
      switch (OP(p)) {
      case REG_ANY:
         if (utf8_target) {
-           while (scan < loceol && hardcount < max && *scan != '\n') {
+           while (scan < this_eol && hardcount < max && *scan != '\n') {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         } else {
-            scan = (char *) memchr(scan, '\n', loceol - scan);
+            scan = (char *) memchr(scan, '\n', this_eol - scan);
              if (! scan) {
-                scan = loceol;
+                scan = this_eol;
              }
         }
         break;
      case SANY:
          if (utf8_target) {
-           while (scan < loceol && hardcount < max) {
+           while (scan < this_eol && hardcount < max) {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         }
         else
-           scan = loceol;
+           scan = this_eol;
         break;
      case EXACTL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
@@ -9170,12 +9386,12 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
           * can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
           * true iff it doesn't matter if the argument is in UTF-8 or not */
          if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! reginfo->is_utf8_pat)) {
-            if (utf8_target && loceol - scan > max) {
-                /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
+            if (utf8_target && this_eol - scan > max) {
+                /* We didn't adjust <this_eol> because is UTF-8, but ok to do so,
                   * since here, to match at all, 1 char == 1 byte */
-                loceol = scan + max;
+                this_eol = scan + max;
              }
-            scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c);
+            scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
         }
         else if (reginfo->is_utf8_pat) {
              if (utf8_target) {
@@ -9184,7 +9400,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  /* When both target and pattern are UTF-8, we have to do
                   * string EQ */
                  while (hardcount < max
-                       && scan < loceol
+                       && scan < this_eol
                         && (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
                         && memEQ(scan, STRING(p), scan_char_len))
                  {
@@ -9197,7 +9413,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  /* Target isn't utf8; convert the character in the UTF-8
                   * pattern to non-UTF8, and do a simple find */
                  c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
-                scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c);
+                scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
              } /* else pattern char is above Latin1, can't possibly match the
                   non-UTF-8 target */
          }
@@ -9211,7 +9427,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             U8 low = UTF8_TWO_BYTE_LO(c);
  
             while (hardcount < max
-                   && scan + 1 < loceol
+                   && scan + 1 < this_eol
                     && UCHARAT(scan) == high
                     && UCHARAT(scan + 1) == low)
             {
@@ -9277,7 +9493,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          {
              if (c1 == CHRTEST_VOID) {
                  /* Use full Unicode fold matching */
-                char *tmpeol = reginfo->strend;
+                char *tmpeol = loceol;
                  STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
                  while (hardcount < max
                          && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
@@ -9285,33 +9501,36 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                                               reginfo->is_utf8_pat, utf8_flags))
                  {
                      scan = tmpeol;
-                    tmpeol = reginfo->strend;
+                    tmpeol = loceol;
                      hardcount++;
                  }
              }
              else if (utf8_target) {
                  if (c1 == c2) {
-                    while (scan < loceol
+                    while (scan < this_eol
                             && hardcount < max
-                           && memEQ(scan, c1_utf8, UTF8SKIP(scan)))
+                           && memEQ(scan, c1_utf8, UTF8_SAFE_SKIP(scan,
+                                                                  loceol)))
                      {
-                        scan += UTF8SKIP(scan);
+                        scan += UTF8SKIP(c1_utf8);
                          hardcount++;
                      }
                  }
                  else {
-                    while (scan < loceol
+                    while (scan < this_eol
                             && hardcount < max
-                           && (memEQ(scan, c1_utf8, UTF8SKIP(scan))
-                               || memEQ(scan, c2_utf8, UTF8SKIP(scan))))
+                           && (   memEQ(scan, c1_utf8, UTF8_SAFE_SKIP(scan,
+                                                                     loceol))
+                               || memEQ(scan, c2_utf8, UTF8_SAFE_SKIP(scan,
+                                                                     loceol))))
                      {
-                        scan += UTF8SKIP(scan);
+                        scan += UTF8_SAFE_SKIP(scan, loceol);
                          hardcount++;
                      }
                  }
              }
              else if (c1 == c2) {
-                scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c1);
+                scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c1);
              }
              else {
                  /* See comments in regmatch() CURLY_B_min_known_fail.  We avoid
@@ -9323,12 +9542,12 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      U8 c1_c2_mask = ~ c1_c2_bits_differing;
  
                      scan = (char *) find_span_end_mask((U8 *) scan,
-                                                       (U8 *) loceol,
+                                                       (U8 *) this_eol,
                                                         c1 & c1_c2_mask,
                                                         c1_c2_mask);
                  }
                  else {
-                    while (    scan < loceol
+                    while (    scan < this_eol
                             && (UCHARAT(scan) == c1 || UCHARAT(scan) == c2))
                      {
                          scan++;
@@ -9350,40 +9569,40 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      case ANYOF:
         if (utf8_target) {
             while (hardcount < max
-                   && scan < loceol
-                  && reginclass(prog, p, (U8*)scan, (U8*) loceol, utf8_target))
+                   && scan < this_eol
+                  && reginclass(prog, p, (U8*)scan, (U8*) this_eol, utf8_target))
             {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         }
          else if (ANYOF_FLAGS(p) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
-           while (scan < loceol
+           while (scan < this_eol
                      && reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0))
                 scan++;
          }
          else {
-           while (scan < loceol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
+           while (scan < this_eol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
                 scan++;
         }
         break;
  
      case ANYOFM:
-        if (utf8_target && loceol - scan > max) {
+        if (utf8_target && this_eol - scan > max) {
  
-            /* We didn't adjust <loceol> at the beginning of this routine
+            /* We didn't adjust <this_eol> at the beginning of this routine
               * because is UTF-8, but it is actually ok to do so, since here, to
               * match, 1 char == 1 byte. */
-            loceol = scan + max;
+            this_eol = scan + max;
          }
  
-        scan = (char *) find_span_end_mask((U8 *) scan, (U8 *) loceol, (U8) ARG(p), FLAGS(p));
+        scan = (char *) find_span_end_mask((U8 *) scan, (U8 *) this_eol, (U8) ARG(p), FLAGS(p));
          break;
  
      case NANYOFM:
         if (utf8_target) {
             while (     hardcount < max
-                   &&   scan < loceol
+                   &&   scan < this_eol
                    &&  (*scan & FLAGS(p)) != ARG(p))
             {
                 scan += UTF8SKIP(scan);
@@ -9391,18 +9610,52 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             }
         }
          else {
-            scan = (char *) find_next_masked((U8 *) scan, (U8 *) loceol, (U8) ARG(p), FLAGS(p));
+            scan = (char *) find_next_masked((U8 *) scan, (U8 *) this_eol, (U8) ARG(p), FLAGS(p));
         }
          break;
  
      case ANYOFH:
-        if (utf8_target) while (   hardcount < max
-                                && scan < loceol
-                                && reginclass(prog, p, (U8*)scan, (U8*) loceol,
-                                                                  TRUE))
-        {
-            scan += UTF8SKIP(scan);
-            hardcount++;
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (  hardcount < max
+                   && scan < this_eol
+                   && NATIVE_UTF8_TO_I8((U8) *scan) >= ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHb:
+        if (utf8_target) {  /* ANYOFHb only can match UTF-8 targets */
+
+            /* we know the first byte must be the FLAGS field */
+            while (   hardcount < max
+                   && scan < this_eol
+                   && (U8) *scan == ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol,
+                                                              TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHr:
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (  hardcount < max
+                   && scan < this_eol
+                   && inRANGE((U8) NATIVE_UTF8_TO_I8(*scan),
+                              LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)),
+                              HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)))
+                   && NATIVE_UTF8_TO_I8((U8) *scan) >= ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
          }
          break;
  
@@ -9415,16 +9668,16 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      case POSIXL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
         if (! utf8_target) {
-           while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
+           while (scan < this_eol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
                                                                     *scan)))
              {
                 scan++;
              }
         } else {
-           while (hardcount < max && scan < loceol
+           while (hardcount < max && scan < this_eol
                     && to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(p),
                                                                    (U8 *) scan,
-                                                                  (U8 *) loceol)))
+                                                                  (U8 *) this_eol)))
              {
                  scan += UTF8SKIP(scan);
                 hardcount++;
@@ -9439,14 +9692,14 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          /* FALLTHROUGH */
  
      case POSIXA:
-        if (utf8_target && loceol - scan > max) {
+        if (utf8_target && this_eol - scan > max) {
  
-            /* We didn't adjust <loceol> at the beginning of this routine
+            /* We didn't adjust <this_eol> at the beginning of this routine
               * because is UTF-8, but it is actually ok to do so, since here, to
               * match, 1 char == 1 byte. */
-            loceol = scan + max;
+            this_eol = scan + max;
          }
-        while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
+        while (scan < this_eol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
             scan++;
         }
         break;
@@ -9460,7 +9713,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case NPOSIXA:
          if (! utf8_target) {
-            while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
+            while (scan < this_eol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
                  scan++;
              }
          }
@@ -9468,8 +9721,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
              /* The complement of something that matches only ASCII matches all
               * non-ASCII, plus everything in ASCII that isn't in the class. */
-           while (hardcount < max && scan < loceol
-                   && (   ! isASCII_utf8_safe(scan, reginfo->strend)
+           while (hardcount < max && scan < this_eol
+                   && (   ! isASCII_utf8_safe(scan, loceol)
                         || ! _generic_isCC_A((U8) *scan, FLAGS(p))))
              {
                  scan += UTF8SKIP(scan);
@@ -9484,7 +9737,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case POSIXU:
         if (! utf8_target) {
-            while (scan < loceol && to_complement
+            while (scan < this_eol && to_complement
                                  ^ cBOOL(_generic_isCC((U8) *scan, FLAGS(p))))
              {
                  scan++;
@@ -9495,11 +9748,11 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
              classnum = (_char_class_number) FLAGS(p);
              switch (classnum) {
                  default:
-                    while (   hardcount < max && scan < loceol
+                    while (   hardcount < max && scan < this_eol
                             && to_complement ^ cBOOL(_invlist_contains_cp(
                                                PL_XPosix_ptrs[classnum],
                                                utf8_to_uvchr_buf((U8 *) scan,
-                                                                (U8 *) loceol,
+                                                                (U8 *) this_eol,
                                                                  NULL))))
                      {
                          scan += UTF8SKIP(scan);
@@ -9515,9 +9768,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
                  case _CC_ENUM_SPACE:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isSPACE_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isSPACE_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9525,9 +9778,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_BLANK:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                                ^ cBOOL(isBLANK_utf8_safe(scan, loceol))))
+                                ^ cBOOL(isBLANK_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9535,9 +9788,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_XDIGIT:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isXDIGIT_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isXDIGIT_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9545,9 +9798,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_VERTSPACE:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isVERTWS_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isVERTWS_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9555,9 +9808,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_CNTRL:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isCNTRL_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isCNTRL_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9569,16 +9822,15 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case LNBREAK:
          if (utf8_target) {
-           while (hardcount < max && scan < loceol &&
-                    (c=is_LNBREAK_utf8_safe(scan, loceol))) {
+           while (hardcount < max && scan < this_eol &&
+                    (c=is_LNBREAK_utf8_safe(scan, this_eol))) {
                 scan += c;
                 hardcount++;
             }
         } else {
              /* LNBREAK can match one or two latin chars, which is ok, but we
               * have to use hardcount in this situation, and throw away the
-             * adjustment to <loceol> done before the switch statement */
-            loceol = reginfo->strend;
+             * adjustment to <this_eol> done before the switch statement */
             while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
                 scan+=c;
                 hardcount++;
@@ -9650,7 +9902,9 @@ STATIC bool
  S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const U8* const p_end, const bool utf8_target)
  {
      dVAR;
-    const char flags = ANYOF_FLAGS(n);
+    const char flags = (inRANGE(OP(n), ANYOFH, ANYOFHr))
+                        ? 0
+                        : ANYOF_FLAGS(n);
      bool match = FALSE;
      UV c = *p;
  
@@ -9677,7 +9931,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
      }
  
      /* If this character is potentially in the bitmap, check it */
-    if (c < NUM_ANYOF_CODE_POINTS && OP(n) != ANYOFH) {
+    if (c < NUM_ANYOF_CODE_POINTS && ! inRANGE(OP(n), ANYOFH, ANYOFHb)) {
         if (ANYOF_BITMAP_TEST(n, c))
             match = TRUE;
         else if ((flags
@@ -9979,6 +10233,7 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
      regmatch_info_aux_eval *eval_state = reginfo->info_aux_eval;
  
      eval_state->rex = rex;
+    eval_state->sv  = reginfo->sv;
  
      if (reginfo->sv) {
          /* Make $_ available to executed code. */
@@ -9986,6 +10241,8 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
              SAVE_DEFSV;
              DEFSV_set(reginfo->sv);
          }
+        /* will be dec'd by S_cleanup_regmatch_info_aux */
+        SvREFCNT_inc_NN(reginfo->sv);
  
          if (!(mg = mg_find_mglob(reginfo->sv))) {
              /* prepare for quick setting of pos */
@@ -10077,6 +10334,7 @@ S_cleanup_regmatch_info_aux(pTHX_ void *arg)
          }
  
          PL_curpm = eval_state->curpm;
+        SvREFCNT_dec(eval_state->sv);
      }
  
      PL_regmatch_state = aux->old_regmatch_state;
@@ -10147,6 +10405,7 @@ S_to_byte_substr(pTHX_ regexp *prog)
             && !prog->substrs->data[i].substr) {
             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
             if (! sv_utf8_downgrade(sv, TRUE)) {
+                SvREFCNT_dec_NN(sv);
                  return FALSE;
              }
              if (SvVALID(prog->substrs->data[i].utf8_substr)) {