Document cast NV to int macros; make helpers internal

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index 26825c7..91fb3d2 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -92,7 +92,7 @@ static const char utf8_locale_required[] =
  #ifdef DEBUGGING
  /* At least one required character in the target string is expressible only in
   * UTF-8. */
-static const char* const non_utf8_target_but_utf8_required
+static const char non_utf8_target_but_utf8_required[]
                  = "Can't match, because target string needs to be in UTF-8\n";
  #endif
  
@@ -218,7 +218,7 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
      const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
      const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
      I32 p;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGCPPUSH;
  
@@ -328,7 +328,7 @@ S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p _pDEPTH)
  {
      UV i;
      U32 paren;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGCPPOP;
  
@@ -422,7 +422,7 @@ Perl_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
       * Ideally this could be replaced by a just an array of function pointers
       * to the C library functions that implement the macros this calls.
       * However, to compile, the precise function signatures are required, and
-     * these may vary from platform to to platform.  To avoid having to figure
+     * these may vary from platform to platform.  To avoid having to figure
       * out what those all are on each platform, I (khw) am using this method,
       * which adds an extra layer of function call overhead (unless the C
       * optimizer strips it away).  But we don't particularly care about
@@ -496,7 +496,6 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e)
       * rules, ignoring any locale.  So use the Unicode function if this class
       * requires an inversion list, and use the Unicode macro otherwise. */
  
-    dVAR;
  
      PERL_ARGS_ASSERT_ISFOO_UTF8_LC;
  
@@ -579,7 +578,7 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
              span_word |= span_word << 4;
  
              /* That reduces the problem to what this function solves */
-            return s + _variant_byte_number(span_word);
+            return s + variant_byte_number(span_word);
  
  #endif
  
@@ -657,7 +656,7 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
              masked &= PERL_VARIANTS_WORD_MASK;
  
              /* This reduces the problem to that solved by this function */
-            s += _variant_byte_number(masked);
+            s += variant_byte_number(masked);
              return s;
  
          } while (s + PERL_WORDSIZE <= send);
@@ -723,7 +722,7 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
              masked |= masked << 1;
              masked |= masked << 2;
              masked |= masked << 4;
-            return s + _variant_byte_number(masked);
+            return s + variant_byte_number(masked);
  
  #endif
  
@@ -859,7 +858,7 @@ Perl_re_intuit_start(pTHX_
      RXi_GET_DECL(prog,progi);
      regmatch_info reginfo_buf;  /* create some info to pass to find_byclass */
      regmatch_info *const reginfo = &reginfo_buf;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_RE_INTUIT_START;
      PERL_UNUSED_ARG(flags);
@@ -1173,8 +1172,8 @@ Perl_re_intuit_start(pTHX_
  
      /* now look for the 'other' substring if defined */
  
-    if (utf8_target ? prog->substrs->data[other_ix].utf8_substr
-                    : prog->substrs->data[other_ix].substr)
+    if (prog->substrs->data[other_ix].utf8_substr
+        || prog->substrs->data[other_ix].substr)
      {
         /* Take into account the "other" substring. */
          char *last, *last1;
@@ -1184,6 +1183,11 @@ Perl_re_intuit_start(pTHX_
  
        do_other_substr:
          other = &prog->substrs->data[other_ix];
+        if (!utf8_target && !other->substr) {
+            if (!to_byte_substr(prog)) {
+                NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
+            }
+        }
  
          /* if "other" is anchored:
           * we've previously found a floating substr starting at check_at.
@@ -1467,10 +1471,10 @@ Perl_re_intuit_start(pTHX_
          const U8* const str = (U8*)STRING(progi->regstclass);
  
          /* XXX this value could be pre-computed */
-        const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
+        const SSize_t cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
                     ?  (reginfo->is_utf8_pat
-                        ? utf8_distance(str + STR_LEN(progi->regstclass), str)
-                        : STR_LEN(progi->regstclass))
+                        ? (SSize_t)utf8_distance(str + STR_LEN(progi->regstclass), str)
+                        : (SSize_t)STR_LEN(progi->regstclass))
                     : 1);
         char * endpos;
          char *s;
@@ -1720,7 +1724,7 @@ STMT_START {
          } else {                                                                    \
              uvc = _toFOLD_utf8_flags( (const U8*) uc, uc_end, foldbuf, &foldlen,    \
                                                                              flags); \
-            len = UTF8SKIP(uc);                                                     \
+            len = UTF8_SAFE_SKIP(uc, uc_end);                                       \
              skiplen = UVCHR_SKIP( uvc );                                            \
              foldlen -= skiplen;                                                     \
              uscan = foldbuf + skiplen;                                              \
@@ -1782,7 +1786,9 @@ STMT_START {
      STMT_START {                                            \
          while (s < strend) {                                \
              CODE                                            \
-            s += ((UTF8) ? UTF8SKIP(s) : 1);                \
+            s += ((UTF8)                                    \
+                  ? UTF8_SAFE_SKIP(s, reginfo->strend)      \
+                  : 1);                                     \
          }                                                   \
      } STMT_END
  
@@ -1796,7 +1802,7 @@ STMT_START {
  #define REXEC_FBC_CLASS_SCAN_GUTS(UTF8, COND)                  \
      if (COND) {                                                \
          FBC_CHECK_AND_TRY                                      \
-        s += ((UTF8) ? UTF8SKIP(s) : 1);                       \
+        s += ((UTF8) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1);\
          previous_occurrence_end = s;                           \
      }                                                          \
      else {                                                     \
@@ -1815,12 +1821,13 @@ STMT_START {
   * of the one we're looking for.  Knowing that, we can see right away if the
   * next occurrence is adjacent to the previous.  When 'doevery' is FALSE, we
   * don't accept the 2nd and succeeding adjacent occurrences */
-#define FBC_CHECK_AND_TRY                                      \
-        if (   (   doevery                                     \
-                || s != previous_occurrence_end)               \
-            && (reginfo->intuit || regtry(reginfo, &s)))       \
-        {                                                      \
-            goto got_it;                                       \
+#define FBC_CHECK_AND_TRY                                           \
+        if (   (   doevery                                          \
+                || s != previous_occurrence_end)                    \
+            && (   reginfo->intuit                                  \
+                || (s <= reginfo->strend && regtry(reginfo, &s))))  \
+        {                                                           \
+            goto got_it;                                            \
          }
  
  
@@ -1839,6 +1846,28 @@ STMT_START {
          previous_occurrence_end = s;                        \
      }
  
+/* This differs from the above macros in that it is passed a single byte that
+ * is known to begin the next occurrence of the thing being looked for in 's'.
+ * It does a memchr to find the next occurrence of 'byte', before trying 'COND'
+ * at that position. */
+#define REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(byte, COND)      \
+    while (s < strend) {                                    \
+        s = (char *) memchr(s, byte, strend -s);            \
+        if (s == NULL) {                                    \
+            s = (char *) strend;                            \
+            break;                                          \
+        }                                                   \
+                                                            \
+        if (COND) {                                         \
+            FBC_CHECK_AND_TRY                               \
+            s += UTF8_SAFE_SKIP(s, reginfo->strend);        \
+            previous_occurrence_end = s;                    \
+        }                                                   \
+        else {                                              \
+            s += UTF8SKIP(s);                               \
+        }                                                   \
+    }
+
  /* The three macros below are slightly different versions of the same logic.
   *
   * The first is for /a and /aa when the target string is UTF-8.  This can only
@@ -1890,7 +1919,8 @@ STMT_START {
  
  /* Like FBC_UTF8_A, but TEST_UV is a macro which takes a UV as its input, and
   * TEST_UTF8 is a macro that for the same input code points returns identically
- * to TEST_UV, but takes a pointer to a UTF-8 encoded string instead */
+ * to TEST_UV, but takes a pointer to a UTF-8 encoded string instead (and an
+ * end pointer as well) */
  #define FBC_UTF8(TEST_UV, TEST_UTF8, IF_SUCCESS, IF_FAIL)                      \
      if (s == reginfo->strbeg) {                                                \
          tmp = '\n';                                                            \
@@ -1945,9 +1975,12 @@ STMT_START {
      }
  
  /* This is the macro to use when we want to see if something that looks like it
- * could match, actually does, and if so exits the loop */
-#define REXEC_FBC_TRYIT                            \
-    if ((reginfo->intuit || regtry(reginfo, &s)))  \
+ * could match, actually does, and if so exits the loop.  It needs to be used
+ * only for bounds checking macros, as it allows for matching beyond the end of
+ * string (which should be zero length without having to look at the string
+ * contents) */
+#define REXEC_FBC_TRYIT                                                     \
+    if (reginfo->intuit || (s <= reginfo->strend && regtry(reginfo, &s)))   \
          goto got_it
  
  /* The only difference between the BOUND and NBOUND cases is that
@@ -2065,7 +2098,6 @@ STATIC char *
  S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, 
      const char *strend, regmatch_info *reginfo)
  {
-    dVAR;
  
      /* TRUE if x+ need not match at just the 1st pos of run of x's */
      const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
@@ -2129,21 +2161,89 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          break;
  
      case ANYOFM:    /* ARG() is the base byte; FLAGS() the mask byte */
-        /* UTF-8ness doesn't matter, so use 0 */
+        /* UTF-8ness doesn't matter because only matches UTF-8 invariants, so
+         * use 0 */
          REXEC_FBC_FIND_NEXT_SCAN(0,
           (char *) find_next_masked((U8 *) s, (U8 *) strend,
                                     (U8) ARG(c), FLAGS(c)));
          break;
  
-    case NANYOFM:
-        REXEC_FBC_FIND_NEXT_SCAN(0,
+    case NANYOFM:   /* UTF-8ness does matter because can match UTF-8 variants.
+                     */
+        REXEC_FBC_FIND_NEXT_SCAN(utf8_target,
           (char *) find_span_end_mask((U8 *) s, (U8 *) strend,
                                     (U8) ARG(c), FLAGS(c)));
          break;
  
      case ANYOFH:
-        if (utf8_target) REXEC_FBC_CLASS_SCAN(TRUE,
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   (U8) NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
+        break;
+
+    case ANYOFHb:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+
+            /* We know what the first byte of any matched string should be */
+            U8 first_byte = FLAGS(c);
+
+            REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
                        reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
+        }
+        break;
+
+    case ANYOFHr:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   inRANGE(NATIVE_UTF8_TO_I8(*s),
+                              LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)),
+                              HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)))
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
+        break;
+
+    case ANYOFHs:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   strend -s >= FLAGS(c)
+                   && memEQ(s, ((struct regnode_anyofhs *) c)->string, FLAGS(c))
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
+        break;
+
+    case ANYOFR:
+        if (utf8_target) {
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) s,
+                                                    (U8 *) strend,
+                                                    NULL),
+                                  ANYOFRbase(c), ANYOFRdelta(c))));
+        }
+        else {
+            REXEC_FBC_CLASS_SCAN(0, withinCOUNT((U8) *s,
+                                               ANYOFRbase(c), ANYOFRdelta(c)));
+        }
+        break;
+
+    case ANYOFRb:
+        if (utf8_target) {
+
+            /* We know what the first byte of any matched string should be */
+            U8 first_byte = FLAGS(c);
+
+            REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
+                      withinCOUNT(utf8_to_uvchr_buf((U8 *) s,
+                                                    (U8 *) strend,
+                                                    NULL),
+                                  ANYOFRbase(c), ANYOFRdelta(c)));
+        }
+        else {
+            REXEC_FBC_CLASS_SCAN(0, withinCOUNT((U8) *s,
+                                               ANYOFRbase(c), ANYOFRdelta(c)));
+        }
          break;
  
      case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
@@ -2207,7 +2307,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                                               | FOLDEQ_S2_FOLDS_SANE;
              goto do_exactf_utf8;
  
-    case EXACTFU_ONLY8:
+    case EXACTFU_REQ8:
          if (! utf8_target) {
              break;
          }
@@ -2239,8 +2339,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
           * first character.  c2 is its fold.  This logic will not work for
           * Unicode semantics and the german sharp ss, which hence should
           * not be compiled into a node that gets here. */
-        pat_string = STRING(c);
-        ln  = STR_LEN(c);      /* length to match in octets/bytes */
+        pat_string = STRINGs(c);
+        ln  = STR_LENs(c);     /* length to match in octets/bytes */
  
          /* We know that we have to match at least 'ln' bytes (which is the
           * same as characters, since not utf8).  If we have to match 3
@@ -2315,8 +2415,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          /* If one of the operands is in utf8, we can't use the simpler folding
           * above, due to the fact that many different characters can have the
           * same fold, or portion of a fold, or different- length fold */
-        pat_string = STRING(c);
-        ln  = STR_LEN(c);      /* length to match in octets/bytes */
+        pat_string = STRINGs(c);
+        ln  = STR_LENs(c);     /* length to match in octets/bytes */
          pat_end = pat_string + ln;
          lnc = is_utf8_pat       /* length to match in characters */
                  ? utf8_length((U8 *) pat_string, (U8 *) pat_end)
@@ -2355,7 +2455,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
              {
                  goto got_it;
              }
-            s += (utf8_target) ? UTF8SKIP(s) : 1;
+            s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
          }
          break;
      }
@@ -2439,7 +2539,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
  
                      /* Didn't match.  Try at the next position (if there is one) */
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2463,7 +2563,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                              goto got_it;
                          }
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8.  Everything is a GCB except between CR and
@@ -2481,7 +2581,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
  
                  /* And, since this is a bound, it can match after the final
                   * character in the string */
-                if ((reginfo->intuit || regtry(reginfo, &s))) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
                  break;
@@ -2491,7 +2593,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if (reginfo->intuit || regtry(reginfo, &s)) {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2515,7 +2617,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                              goto got_it;
                          }
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8. */
@@ -2537,7 +2639,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
                  }
  
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
  
@@ -2548,7 +2652,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if (reginfo->intuit || regtry(reginfo, &s)) {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2573,7 +2677,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                              goto got_it;
                          }
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8. */
@@ -2598,7 +2702,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                  /* Here are at the final position in the target string.  The SB
                   * value is always true here, so matches, depending on other
                   * constraints */
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
  
@@ -2609,7 +2715,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if (reginfo->intuit || regtry(reginfo, &s)) {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2643,7 +2749,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                          }
                          previous = before;
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8. */
@@ -2668,7 +2774,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
                  }
  
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
          }
@@ -2805,7 +2913,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
              U8 *bitmap=NULL;
  
  
-            GET_RE_DEBUG_FLAGS_DECL;
+            DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
              /* We can't just allocate points here. We need to wrap it in
               * an SV so it gets freed properly if there is a croak while
@@ -2985,7 +3093,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                          LEAVE;
                          goto got_it;
                      }
-                    s = HOPc(s,1);
+                    if (s < reginfo->strend) {
+                        s = HOPc(s,1);
+                    }
                      DEBUG_TRIE_EXECUTE_r({
                          Perl_re_printf( aTHX_ "Pattern failed. Looking for new start point...\n");
                      });
@@ -3186,7 +3296,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
      regmatch_info *const reginfo = &reginfo_buf;
      regexp_paren_pair *swap = NULL;
      I32 oldsave;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGEXEC_FLAGS;
      PERL_UNUSED_ARG(data);
@@ -3240,7 +3350,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                  if (!startpos ||
                      ((flags & REXEC_FAIL_ON_UNDERFLOW) && startpos < stringarg))
                  {
-                    DEBUG_r(Perl_re_printf( aTHX_
+                    DEBUG_GPOS_r(Perl_re_printf( aTHX_
                              "fail: ganch-gofs before earliest possible start\n"));
                      return 0;
                  }
@@ -3259,8 +3369,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
  
      minlen = prog->minlen;
      if ((startpos + minlen) > strend || startpos < strbeg) {
-        DEBUG_r(Perl_re_printf( aTHX_
-                    "Regex match can't succeed, so not even tried\n"));
+       DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
+                        "Regex match can't succeed, so not even tried\n"));
          return 0;
      }
  
@@ -3305,7 +3415,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
              RXp_MATCH_UTF8_set(prog, utf8_target);
              prog->offs[0].start = s - strbeg;
              prog->offs[0].end = utf8_target
-                ? (char*)utf8_hop((U8*)s, prog->minlenret) - strbeg
+                ? (char*)utf8_hop_forward((U8*)s, prog->minlenret, (U8 *) strend) - strbeg
                  : s - strbeg + prog->minlenret;
              if ( !(flags & REXEC_NOT_FIRST) )
                  S_reg_set_capture_string(aTHX_ rx,
@@ -3500,11 +3610,11 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                  to_utf8_substr(prog);
              }
              ch = SvPVX_const(prog->anchored_utf8)[0];
-           REXEC_FBC_SCAN(0,   /* 0=>not-utf8 */
+           REXEC_FBC_SCAN(1,   /* 1=>utf8 */
                 if (*s == ch) {
                     DEBUG_EXECUTE_r( did_match = 1 );
                     if (regtry(reginfo, &s)) goto got_it;
-                   s += UTF8SKIP(s);
+                   s += UTF8_SAFE_SKIP(s, strend);
                     while (s < strend && *s == ch)
                         s += UTF8SKIP(s);
                 }
@@ -3872,7 +3982,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
      U32 depth = 0; /* used by REGCP_SET */
  #endif
      RXi_GET_DECL(prog,progi);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGTRY;
  
@@ -4162,13 +4272,14 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
       * to/from code points */
      bool utf8_has_been_setup = FALSE;
  
-    dVAR;
  
      U8 *pat = (U8*)STRING(text_node);
      U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
  
      if (   OP(text_node) == EXACT
-        || OP(text_node) == EXACT_ONLY8
+        || OP(text_node) == LEXACT
+        || OP(text_node) == EXACT_REQ8
+        || OP(text_node) == LEXACT_REQ8
          || OP(text_node) == EXACTL)
      {
  
@@ -4177,7 +4288,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
           * copy the input to the output, avoiding finding the code point of
           * that character */
          if (!is_utf8_pat) {
-            assert(OP(text_node) != EXACT_ONLY8);
+            assert(   OP(text_node) != EXACT_REQ8
+                   && OP(text_node) != LEXACT_REQ8);
              c2 = c1 = *pat;
          }
          else if (utf8_target) {
@@ -4185,7 +4297,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
              Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
              utf8_has_been_setup = TRUE;
          }
-        else if (OP(text_node) == EXACT_ONLY8) {
+        else if (   OP(text_node) == EXACT_REQ8
+                 || OP(text_node) == LEXACT_REQ8)
+        {
              return FALSE;   /* Can only match UTF-8 target */
          }
          else {
@@ -4193,7 +4307,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
          }
      }
      else { /* an EXACTFish node */
-        U8 *pat_end = pat + STR_LEN(text_node);
+        U8 *pat_end = pat + STR_LENs(text_node);
  
          /* An EXACTFL node has at least some characters unfolded, because what
           * they match is not known until now.  So, now is the time to fold
@@ -4275,8 +4389,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                  }
              }
              else if (c1 > 255) {
-                const unsigned int * remaining_folds;
-                unsigned int first_fold;
+                const U32 * remaining_folds;
+                U32 first_fold;
  
                  /* Look up what code points (besides c1) fold to c1;  e.g.,
                   * [ 'K', KELVIN_SIGN ] both fold to 'k'. */
@@ -4358,7 +4472,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                      case EXACTFU:
                          c2 = PL_fold_latin1[c1];
                          break;
-                    case EXACTFU_ONLY8:
+                    case EXACTFU_REQ8:
                          return FALSE;
                          NOT_REACHED; /* NOTREACHED */
  
@@ -4410,7 +4524,7 @@ STATIC bool
  S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strbeg, const U8 * const curpos, const bool utf8_target)
  {
      /* returns a boolean indicating if there is a Grapheme Cluster Boundary
-     * between the inputs.  See http://www.unicode.org/reports/tr29/. */
+     * between the inputs.  See https://www.unicode.org/reports/tr29/. */
  
      PERL_ARGS_ASSERT_ISGCB;
  
@@ -4472,7 +4586,7 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
                  }
                  while (prev == GCB_Extend);
  
-                return prev != GCB_XPG_XX;
+                return prev != GCB_ExtPict_XX;
              }
  
          default:
@@ -4490,7 +4604,6 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
  STATIC GCB_enum
  S_backup_one_GCB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      GCB_enum gcb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_GCB;
@@ -4768,7 +4881,6 @@ S_isLB(pTHX_ LB_enum before,
  STATIC LB_enum
  S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
  {
-    dVAR;
  
      LB_enum lb;
  
@@ -4799,7 +4911,6 @@ S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_ta
  STATIC LB_enum
  S_backup_one_LB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      LB_enum lb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_LB;
@@ -4847,7 +4958,7 @@ S_isSB(pTHX_ SB_enum before,
               const bool utf8_target)
  {
      /* returns a boolean indicating if there is a Sentence Boundary Break
-     * between the inputs.  See http://www.unicode.org/reports/tr29/ */
+     * between the inputs.  See https://www.unicode.org/reports/tr29/ */
  
      U8 * lpos = (U8 *) curpos;
      bool has_para_sep = FALSE;
@@ -5036,7 +5147,6 @@ S_isSB(pTHX_ SB_enum before,
  STATIC SB_enum
  S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
  {
-    dVAR;
      SB_enum sb;
  
      PERL_ARGS_ASSERT_ADVANCE_ONE_SB;
@@ -5070,7 +5180,6 @@ S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_ta
  STATIC SB_enum
  S_backup_one_SB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      SB_enum sb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_SB;
@@ -5307,7 +5416,6 @@ S_advance_one_WB(pTHX_ U8 ** curpos,
                         const bool utf8_target,
                         const bool skip_Extend_Format)
  {
-    dVAR;
      WB_enum wb;
  
      PERL_ARGS_ASSERT_ADVANCE_ONE_WB;
@@ -5345,7 +5453,6 @@ S_advance_one_WB(pTHX_ U8 ** curpos,
  STATIC WB_enum
  S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      WB_enum wb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_WB;
@@ -5465,16 +5572,20 @@ S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos,
  
  /* push a new state then goto it */
  
-#define PUSH_STATE_GOTO(state, node, input) \
+#define PUSH_STATE_GOTO(state, node, input, eol, sr0)       \
      pushinput = input; \
+    pusheol = eol; \
+    pushsr0 = sr0; \
      scan = node; \
      st->resume_state = state; \
      goto push_state;
  
  /* push a new state with success backtracking, then goto it */
  
-#define PUSH_YES_STATE_GOTO(state, node, input) \
+#define PUSH_YES_STATE_GOTO(state, node, input, eol, sr0)   \
      pushinput = input; \
+    pusheol = eol;     \
+    pushsr0 = sr0; \
      scan = node; \
      st->resume_state = state; \
      goto push_yes_state;
@@ -5530,7 +5641,7 @@ the subpattern to be matched possibly multiple times, while B is the entire
  rest of the pattern. Variable and state names reflect this convention.
  
  The states in the main switch are the union of ops and failure/success of
-substates associated with with that op.  For example, IFMATCH is the op
+substates associated with that op.  For example, IFMATCH is the op
  that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
  'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
  successfully matched A and IFMATCH_A_fail is a state saying that we have
@@ -5595,8 +5706,8 @@ The topmost backtrack state, pointed to by st, is usually free. If you
  want to claim it, populate any ST.foo fields in it with values you wish to
  save, then do one of
  
-       PUSH_STATE_GOTO(resume_state, node, newinput);
-       PUSH_YES_STATE_GOTO(resume_state, node, newinput);
+       PUSH_STATE_GOTO(resume_state, node, newinput, new_eol);
+       PUSH_YES_STATE_GOTO(resume_state, node, newinput, new_eol);
  
  which sets that backtrack state's resume value to 'resume_state', pushes a
  new free entry to the top of the backtrack stack, then goes to 'node'.
@@ -5624,13 +5735,22 @@ allocated, and is never freed until interpreter destruction. When the slab
  is full, a new one is allocated and chained to the end. At exit from
  regmatch(), slabs allocated since entry are freed.
  
+In order to work with variable length lookbehinds, an upper limit is placed on
+lookbehinds which is set to where the match position is at the end of where the
+lookbehind would get to.  Nothing in the lookbehind should match above that,
+except we should be able to look beyond if for things like \b, which need the
+next character in the string to be able to determine if this is a boundary or
+not.  We also can't match the end of string/line unless we are also at the end
+of the entire string, so NEXTCHR_IS_EOS remains the same, and for those OPs
+that match a width, we have to add a condition that they are within the legal
+bounds of our window into the string.
+
  */
  
  /* returns -1 on failure, $+[0] on success */
  STATIC SSize_t
  S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  {
-    dVAR;
      const bool utf8_target = reginfo->is_utf8_target;
      const U32 uniflags = UTF8_ALLOW_DEFAULT;
      REGEXP *rex_sv = reginfo->prog;
@@ -5645,7 +5765,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
      SSize_t ln = 0; /* len or last;  init to avoid compiler warning */
      SSize_t endref = 0; /* offset of end of backref when ln is start */
      char *locinput = startpos;
+    char *loceol = reginfo->strend;
      char *pushinput; /* where to continue after a PUSH */
+    char *pusheol;   /* where to stop matching (loceol) after a PUSH */
+    U8   *pushsr0;   /* save starting pos of script run */
      I32 nextchr;   /* is always set to UCHARAT(locinput), or -1 at EOS */
  
      bool result = 0;       /* return value of S_regmatch */
@@ -5707,7 +5830,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  #endif
  
  #ifdef DEBUGGING
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  #endif
  
      /* protect against undef(*^R) */
@@ -5782,7 +5905,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             /* update the startpoint */
             st->u.keeper.val = rex->offs[0].start;
             rex->offs[0].start = locinput - reginfo->strbeg;
-           PUSH_STATE_GOTO(KEEPS_next, next, locinput);
+           PUSH_STATE_GOTO(KEEPS_next, next, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case KEEPS_next_fail:
@@ -5809,13 +5933,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
  
         case SANY: /*  /./s  */
-           if (NEXTCHR_IS_EOS)
+           if (NEXTCHR_IS_EOS || locinput >= loceol)
                 sayNO;
              goto increment_locinput;
  
         case REG_ANY: /*  /./  */
-           if ((NEXTCHR_IS_EOS) || nextchr == '\n')
+           if (   NEXTCHR_IS_EOS
+                || locinput >= loceol
+                || nextchr == '\n')
+            {
                 sayNO;
+            }
              goto increment_locinput;
  
  
@@ -5825,7 +5953,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* In this case the charclass data is available inline so
                 we can fail fast without a lot of extra overhead. 
               */
-            if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
+            if ( !   NEXTCHR_IS_EOS
+                &&   locinput < loceol
+                && ! ANYOF_BITMAP_TEST(scan, nextchr))
+            {
                  DEBUG_EXECUTE_r(
                      Perl_re_exec_indentf( aTHX_  "%sTRIE: failed to match trie start class...%s\n",
                                depth, PL_colors[4], PL_colors[5])
@@ -5904,7 +6035,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      }
                  }
                  if (   trie->bitmap
-                    && (NEXTCHR_IS_EOS || !TRIE_BITMAP_TEST(trie, nextchr)))
+                    && (     NEXTCHR_IS_EOS
+                        ||   locinput >= loceol
+                        || ! TRIE_BITMAP_TEST(trie, nextchr)))
                  {
                     if (trie->states[ state ].wordnum) {
                          DEBUG_EXECUTE_r(
@@ -5942,7 +6075,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                    shortest accept state and the wordnum of the longest
                    accept state */
  
-               while ( state && uc <= (U8*)(reginfo->strend) ) {
+               while ( state && uc <= (U8*)(loceol) ) {
                      U32 base = trie->states[ state ].trans.base;
                      UV uvc = 0;
                      U16 charid = 0;
@@ -5977,10 +6110,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                     });
  
                     /* read a char and goto next state */
-                   if ( base && (foldlen || uc < (U8*)(reginfo->strend))) {
+                   if ( base && (foldlen || uc < (U8*)(loceol))) {
                         I32 offset;
                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
-                                             (U8 *) reginfo->strend, uscan,
+                                             (U8 *) loceol, uscan,
                                               len, uvc, charid, foldlen,
                                               foldbuf, uniflags);
                         charcount++;
@@ -6104,6 +6237,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
                     while (chars) {
                         if (utf8_target) {
+                            /* XXX This assumes the length is well-formed, as
+                             * does the UTF8SKIP below */
                             uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len,
                                                     uniflags);
                             uc += len;
@@ -6147,7 +6282,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             });
  
             if ( ST.accepted > 1 || has_cutgroup || ST.jump ) {
-               PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc);
+               PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
             /* only one choice left - just continue */
@@ -6175,6 +6311,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
          }
  #undef  ST
  
+       case LEXACT_REQ8:
+            if (! utf8_target) {
+                sayNO;
+            }
+            /* FALLTHROUGH */
+
+       case LEXACT:
+        {
+           char *s;
+
+           s = STRINGl(scan);
+           ln = STR_LENl(scan);
+            goto join_short_long_exact;
+
         case EXACTL:             /*  /abc/l       */
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
  
@@ -6188,16 +6338,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend);
              }
              goto do_exact;
-       case EXACT_ONLY8:
+       case EXACT_REQ8:
              if (! utf8_target) {
                  sayNO;
              }
              /* FALLTHROUGH */
-       case EXACT: {            /*  /abc/        */
-           char *s;
+
+       case EXACT:             /*  /abc/        */
            do_exact:
-           s = STRING(scan);
-           ln = STR_LEN(scan);
+           s = STRINGs(scan);
+           ln = STR_LENs(scan);
+
+          join_short_long_exact:
             if (utf8_target != is_utf8_pat) {
                 /* The target and the pattern have differing utf8ness. */
                 char *l = locinput;
@@ -6214,7 +6366,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                       * is an invariant, but there are tests in the test suite
                       * dealing with (??{...}) which violate this) */
                     while (s < e) {
-                       if (l >= reginfo->strend
+                       if (   l >= loceol
                              || UTF8_IS_ABOVE_LATIN1(* (U8*) l))
                          {
                              sayNO;
@@ -6238,7 +6390,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 else {
                     /* The target is not utf8, the pattern is utf8. */
                     while (s < e) {
-                        if (l >= reginfo->strend
+                        if (   l >= loceol
                              || UTF8_IS_ABOVE_LATIN1(* (U8*) s))
                          {
                              sayNO;
@@ -6264,7 +6416,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              else {
                  /* The target and the pattern have the same utf8ness. */
                  /* Inline the first character, for speed. */
-                if (reginfo->strend - locinput < ln
+                if (   loceol - locinput < ln
                      || UCHARAT(s) != nextchr
                      || (ln > 1 && memNE(s, locinput, ln)))
                  {
@@ -6300,7 +6452,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             fold_array = PL_fold_latin1;
             goto do_exactf;
  
-        case EXACTFU_ONLY8:      /* /abc/iu with something in /abc/ > 255 */
+        case EXACTFU_REQ8:      /* /abc/iu with something in /abc/ > 255 */
              if (! utf8_target) {
                  sayNO;
              }
@@ -6349,8 +6501,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             fold_utf8_flags = 0;
  
           do_exactf:
-           s = STRING(scan);
-           ln = STR_LEN(scan);
+           s = STRINGs(scan);
+           ln = STR_LENs(scan);
  
             if (   utf8_target
                  || is_utf8_pat
@@ -6360,7 +6512,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
               /* Either target or the pattern are utf8, or has the issue where
                * the fold lengths may differ. */
                 const char * const l = locinput;
-               char *e = reginfo->strend;
+               char *e = loceol;
  
                 if (! foldEQ_utf8_flags(l, &e, 0,  utf8_target,
                                          s, 0,  ln, is_utf8_pat,fold_utf8_flags))
@@ -6378,7 +6530,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             {
                 sayNO;
             }
-           if (reginfo->strend - locinput < ln)
+           if (loceol - locinput < ln)
                 sayNO;
             if (ln > 1 && ! folder(locinput, s, ln))
                 sayNO;
@@ -6407,9 +6559,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 if (locinput == reginfo->strbeg)
                     b1 = isWORDCHAR_LC('\n');
                 else {
-                    b1 = isWORDCHAR_LC_utf8_safe(reghop3((U8*)locinput, -1,
-                                                        (U8*)(reginfo->strbeg)),
-                                                 (U8*)(reginfo->strend));
+                    U8 *p = reghop3((U8*)locinput, -1,
+                                    (U8*)(reginfo->strbeg));
+                    b1 = isWORDCHAR_LC_utf8_safe(p, (U8*)(reginfo->strend));
                 }
                  b2 = (NEXTCHR_IS_EOS)
                      ? isWORDCHAR_LC('\n')
@@ -6486,13 +6638,15 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      case TRADITIONAL_BOUND:
                      {
                          bool b1, b2;
-                        b1 = (locinput == reginfo->strbeg)
-                             ? 0 /* isWORDCHAR_L1('\n') */
-                             : isWORDCHAR_utf8_safe(
-                                               reghop3((U8*)locinput,
-                                                       -1,
-                                                       (U8*)(reginfo->strbeg)),
-                                                    (U8*) reginfo->strend);
+                        if (locinput == reginfo->strbeg) {
+                            b1 = 0 /* isWORDCHAR_L1('\n') */;
+                        }
+                        else {
+                            U8 *p = reghop3((U8*)locinput, -1,
+                                            (U8*)(reginfo->strbeg));
+
+                            b1 = isWORDCHAR_utf8_safe(p, (U8*) reginfo->strend);
+                        }
                          b2 = (NEXTCHR_IS_EOS)
                              ? 0 /* isWORDCHAR_L1('\n') */
                              : isWORDCHAR_utf8_safe((U8*)locinput,
@@ -6674,7 +6828,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* FALLTHROUGH */
         case ANYOFD:  /*   /[abc]/d       */
         case ANYOF:  /*   /[abc]/       */
-            if (NEXTCHR_IS_EOS)
+            if (NEXTCHR_IS_EOS || locinput >= loceol)
                  sayNO;
             if (  (! utf8_target || UTF8_IS_INVARIANT(*locinput))
                 && ! (ANYOF_FLAGS(scan) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP))
@@ -6685,7 +6839,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 locinput++;
              }
              else {
-               if (!reginclass(rex, scan, (U8*)locinput, (U8*)reginfo->strend,
+               if (!reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
                                                                     utf8_target))
                  {
                     sayNO;
@@ -6695,14 +6849,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
  
          case ANYOFM:
-            if (NEXTCHR_IS_EOS || (UCHARAT(locinput) & FLAGS(scan)) != ARG(scan)) {
+            if (   NEXTCHR_IS_EOS
+                || (UCHARAT(locinput) & FLAGS(scan)) != ARG(scan)
+                || locinput >= loceol)
+            {
                  sayNO;
              }
              locinput++; /* ANYOFM is always single byte */
              break;
  
          case NANYOFM:
-            if (NEXTCHR_IS_EOS || (UCHARAT(locinput) & FLAGS(scan)) == ARG(scan)) {
+            if (   NEXTCHR_IS_EOS
+                || (UCHARAT(locinput) & FLAGS(scan)) == ARG(scan)
+                || locinput >= loceol)
+            {
                  sayNO;
              }
              goto increment_locinput;
@@ -6711,7 +6871,34 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
          case ANYOFH:
              if (   ! utf8_target
                  ||   NEXTCHR_IS_EOS
-               || ! reginclass(rex, scan, (U8*)locinput, (U8*)reginfo->strend,
+                ||   ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8(*locinput)
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHb:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                ||   ANYOF_FLAGS(scan) != (U8) *locinput
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                  utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHr:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                || ! inRANGE((U8) NATIVE_UTF8_TO_I8(*locinput),
+                             LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)),
+                             HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)))
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
                                                                     utf8_target))
              {
                  sayNO;
@@ -6719,6 +6906,69 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              goto increment_locinput;
              break;
  
+        case ANYOFHs:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                ||   loceol - locinput < FLAGS(scan)
+                ||   memNE(locinput, ((struct regnode_anyofhs *) scan)->string, FLAGS(scan))
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFR:
+            if (NEXTCHR_IS_EOS) {
+                sayNO;
+            }
+
+            if (utf8_target) {
+                if (    ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8(*locinput)
+                   || ! withinCOUNT(utf8_to_uvchr_buf((U8 *) locinput,
+                                                (U8 *) reginfo->strend,
+                                                NULL),
+                                    ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            else {
+                if (! withinCOUNT((U8) *locinput,
+                                  ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFRb:
+            if (NEXTCHR_IS_EOS) {
+                sayNO;
+            }
+
+            if (utf8_target) {
+                if (     ANYOF_FLAGS(scan) != (U8) *locinput
+                    || ! withinCOUNT(utf8_to_uvchr_buf((U8 *) locinput,
+                                                (U8 *) reginfo->strend,
+                                                NULL),
+                                     ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            else {
+                if (! withinCOUNT((U8) *locinput,
+                                  ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            goto increment_locinput;
+            break;
+
          /* The argument (FLAGS) to all the POSIX node types is the class number
           * */
  
@@ -6728,7 +6978,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case POSIXL:    /* \w or [:punct:] etc. under /l */
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-            if (NEXTCHR_IS_EOS)
+            if (NEXTCHR_IS_EOS || locinput >= loceol)
                  sayNO;
  
              /* Use isFOO_lc() for characters within Latin1.  (Note that
@@ -6773,7 +7023,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case NPOSIXA:   /* \W or [:^punct:] etc. under /a */
  
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
@@ -6792,7 +7042,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
               * UTF-8, and also from NPOSIXA even in UTF-8 when the current
               * character is a single byte */
  
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
@@ -6815,7 +7065,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case POSIXU:    /* \w or [:punct:] etc. under /u */
            utf8_posix:
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
@@ -6890,13 +7140,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                          }
                          break;
                  }
-                locinput += UTF8SKIP(locinput);
+                locinput += UTF8_SAFE_SKIP(locinput, reginfo->strend);
              }
              break;
  
         case CLUMP: /* Match \X: logical Unicode character.  This is defined as
                        a Unicode extended Grapheme Cluster */
-           if (NEXTCHR_IS_EOS)
+           if (NEXTCHR_IS_EOS || locinput >= loceol)
                 sayNO;
             if  (! utf8_target) {
  
@@ -6905,7 +7155,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 locinput++;         /* Match the . or CR */
                 if (nextchr == '\r' /* And if it was CR, and the next is LF,
                                        match the LF */
-                   && locinput < reginfo->strend
+                   && locinput <  loceol
                     && UCHARAT(locinput) == '\n')
                  {
                      locinput++;
@@ -6922,7 +7172,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                   * current character.  (There is always a break at the
                   * end-of-input) */
                  locinput += UTF8SKIP(locinput);
-                while (locinput < reginfo->strend) {
+                while (locinput < loceol) {
                      GCB_enum cur_gcb = getGCB_VAL_UTF8((U8*) locinput,
                                                           (U8*) reginfo->strend);
                      if (isGCB(prev_gcb, cur_gcb,
@@ -6940,7 +7190,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
             break;
              
-       case NREFFL:  /*  /\g{name}/il  */
+       case REFFLN:  /*  /\g{name}/il  */
         {   /* The capture buffer cases.  The ones beginning with N for the
                named buffers just convert to the equivalent numbered and
                pretend they were called as the corresponding numbered buffer
@@ -6960,28 +7210,28 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             utf8_fold_flags = FOLDEQ_LOCALE;
             goto do_nref;
  
-       case NREFFA:  /*  /\g{name}/iaa  */
+       case REFFAN:  /*  /\g{name}/iaa  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFA;
             utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_nref;
  
-       case NREFFU:  /*  /\g{name}/iu  */
+       case REFFUN:  /*  /\g{name}/iu  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFU;
             utf8_fold_flags = 0;
             goto do_nref;
  
-       case NREFF:  /*  /\g{name}/i  */
+       case REFFN:  /*  /\g{name}/i  */
             folder = foldEQ;
             fold_array = PL_fold;
             type = REFF;
             utf8_fold_flags = 0;
             goto do_nref;
  
-       case NREF:  /*  /\g{name}/   */
+       case REFN:  /*  /\g{name}/   */
             type = REF;
             folder = NULL;
             fold_array = NULL;
@@ -7044,11 +7294,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             if (type != REF     /* REF can do byte comparison */
                 && (utf8_target || type == REFFU || type == REFFL))
             {
-               char * limit = reginfo->strend;
+               char * limit = loceol;
  
                 /* This call case insensitively compares the entire buffer
                     * at s, with the current input starting at locinput, but
-                    * not going off the end given by reginfo->strend, and
+                    * not going off the end given by loceol, and
                      * returns in <limit> upon success, how much of the
                      * current input was matched */
                 if (! foldEQ_utf8_flags(s, NULL, endref - ln, utf8_target,
@@ -7061,13 +7311,16 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
  
             /* Not utf8:  Inline the first character, for speed. */
-           if (!NEXTCHR_IS_EOS &&
-                UCHARAT(s) != nextchr &&
-               (type == REF ||
-                UCHARAT(s) != fold_array[nextchr]))
+           if ( ! NEXTCHR_IS_EOS
+                && locinput < loceol
+                && UCHARAT(s) != nextchr
+                && (   type == REF
+                    || UCHARAT(s) != fold_array[nextchr]))
+            {
                 sayNO;
+            }
             ln = endref - ln;
-           if (locinput + ln > reginfo->strend)
+           if (locinput + ln > loceol)
                 sayNO;
             if (ln > 1 && (type == REF
                            ? memNE(s, locinput, ln)
@@ -7127,7 +7380,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  rex->recurse_locinput[arg]= locinput;
  
                  DEBUG_r({
-                    GET_RE_DEBUG_FLAGS_DECL;
+                    DECLARE_AND_GET_RE_DEBUG_FLAGS;
                      DEBUG_STACK_r({
                          Perl_re_exec_indentf( aTHX_
                              "entering GOSUB, prev_recurse_locinput=%p recurse_locinput[%d]=%p\n",
@@ -7146,7 +7399,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* NOTREACHED */
  
          case EVAL:  /*   /(?{...})B/   /(??{A})B/  and  /(?(?{...})X|Y)B/   */
-            if (cur_eval && cur_eval->locinput==locinput) {
+            if (logical == 2 && cur_eval && cur_eval->locinput==locinput) {
                 if ( ++nochange_depth > max_nochange_depth )
                      Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
              } else {
@@ -7374,7 +7627,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  PL_curpm = PL_reg_curpm;
  
                 if (logical != 2) {
-                    PUSH_STATE_GOTO(EVAL_B, next, locinput);
+                    PUSH_STATE_GOTO(EVAL_B, next, locinput, loceol,
+                                    script_run_begin);
                     /* NOTREACHED */
                  }
             }
@@ -7474,7 +7728,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 ST.prev_eval = cur_eval;
                 cur_eval = st;
                 /* now continue from first node in postoned RE */
-               PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput);
+               PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput,
+                                    loceol, script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
         }
  
@@ -7630,7 +7885,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
             break;
  
-       case NGROUPP:  /*  (?(<name>))  */
+       case GROUPPN:  /*  (?(<name>))  */
             /* reg_check_named_buff_matched returns 0 for no match */
             sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
             break;
@@ -7774,7 +8029,8 @@ NULL
             ST.count = -1;      /* this will be updated by WHILEM */
             ST.lastloc = NULL;  /* this will be updated by WHILEM */
  
-           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput);
+           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput, loceol,
+                                script_run_begin);
             NOT_REACHED; /* NOTREACHED */
         }
  
@@ -7822,7 +8078,8 @@ NULL
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
  
-               PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput);
+               PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
  
@@ -7930,7 +8187,7 @@ NULL
                 ST.save_curlyx = cur_curlyx;
                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
-                                    locinput);
+                                    locinput, loceol, script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
  
@@ -7941,7 +8198,8 @@ NULL
                              maxopenparen);
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
-               PUSH_STATE_GOTO(WHILEM_A_max, A, locinput);
+               PUSH_STATE_GOTO(WHILEM_A_max, A, locinput, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
             goto do_whilem_B_max;
@@ -7993,7 +8251,7 @@ NULL
             ST.save_curlyx = cur_curlyx;
             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
-                                locinput);
+                                locinput, loceol, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
@@ -8024,7 +8282,7 @@ NULL
             REGCP_SET(ST.lastcp);
             PUSH_STATE_GOTO(WHILEM_A_min,
                 /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
-                locinput);
+                locinput, loceol, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
  #undef  ST
@@ -8046,9 +8304,11 @@ NULL
  
             /* Now go into the branch */
             if (has_cutgroup) {
-               PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput);
+               PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
+                                    script_run_begin);
             } else {
-               PUSH_STATE_GOTO(BRANCH_next, scan, locinput);
+               PUSH_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
+                                script_run_begin);
             }
             NOT_REACHED; /* NOTREACHED */
  
@@ -8056,7 +8316,8 @@ NULL
              sv_yes_mark = st->u.mark.mark_name = scan->flags
                  ? MUTABLE_SV(rexi->data->data[ ARG( scan ) ])
                  : NULL;
-            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
+            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol,
+                            script_run_begin);
              NOT_REACHED; /* NOTREACHED */
  
          case CUTGROUP_next_fail:
@@ -8133,7 +8394,8 @@ NULL
                 goto curlym_do_B;
  
           curlym_do_A: /* execute the A in /A{m,n}B/  */
-           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput); /* match A */
+           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput, loceol, /* match A */
+                                script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case CURLYM_A: /* we've just matched an A */
@@ -8203,8 +8465,15 @@ NULL
                 );
             if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
                  if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
-                    if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
-                        && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
+
+                           /* (We can use memEQ and memNE in this file without
+                            * having to worry about one being shorter than the
+                            * other, since the first byte of each gives the
+                            * length of the character) */
+                    if (   memNE(locinput, ST.c1_utf8, UTF8_SAFE_SKIP(locinput,
+                                                              reginfo->strend))
+                        && memNE(locinput, ST.c2_utf8, UTF8_SAFE_SKIP(locinput,
+                                                             reginfo->strend)))
                      {
                          /* simulate B failing */
                          DEBUG_OPTIMISE_r(
@@ -8250,7 +8519,8 @@ NULL
                 }
             }
             
-           PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput); /* match B */
+           PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput, loceol,   /* match B */
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case CURLYM_B_fail: /* just failed to match a B */
@@ -8313,7 +8583,7 @@ NULL
              if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
              {
                  char *li = locinput;
-                if (!regrepeat(rex, &li, scan, reginfo, 1))
+                if (!regrepeat(rex, &li, scan, loceol, reginfo, 1))
                     sayNO;
                  SET_locinput(li);
                  goto fake_end;
@@ -8369,7 +8639,7 @@ NULL
                  char *li = locinput;
                 minmod = 0;
                 if (ST.min &&
-                        regrepeat(rex, &li, ST.A, reginfo, ST.min)
+                        regrepeat(rex, &li, ST.A, loceol, reginfo, ST.min)
                              < ST.min)
                     sayNO;
                  SET_locinput(li);
@@ -8383,7 +8653,7 @@ NULL
                 /* set ST.maxpos to the furthest point along the
                  * string that could possibly match */
                 if  (ST.max == REG_INFTY) {
-                   ST.maxpos = reginfo->strend - 1;
+                   ST.maxpos = loceol - 1;
                     if (utf8_target)
                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
                             ST.maxpos--;
@@ -8391,13 +8661,13 @@ NULL
                 else if (utf8_target) {
                     int m = ST.max - ST.min;
                     for (ST.maxpos = locinput;
-                        m >0 && ST.maxpos < reginfo->strend; m--)
+                        m >0 && ST.maxpos <  loceol; m--)
                         ST.maxpos += UTF8SKIP(ST.maxpos);
                 }
                 else {
                     ST.maxpos = locinput + ST.max - ST.min;
-                   if (ST.maxpos >= reginfo->strend)
-                       ST.maxpos = reginfo->strend - 1;
+                   if (ST.maxpos >=  loceol)
+                       ST.maxpos =  loceol - 1;
                 }
                 goto curly_try_B_min_known;
  
@@ -8406,7 +8676,7 @@ NULL
                  /* avoid taking address of locinput, so it can remain
                   * a register var */
                  char *li = locinput;
-                ST.count = regrepeat(rex, &li, ST.A, reginfo, ST.max);
+                ST.count = regrepeat(rex, &li, ST.A, loceol, reginfo, ST.max);
                 if (ST.count < ST.min)
                     sayNO;
                  SET_locinput(li);
@@ -8439,7 +8709,7 @@ NULL
              if (ST.c1 == CHRTEST_VOID) {
                  /* failed -- move forward one */
                  char *li = locinput;
-                if (!regrepeat(rex, &li, ST.A, reginfo, 1)) {
+                if (!regrepeat(rex, &li, ST.A, loceol, reginfo, 1)) {
                      sayNO;
                  }
                  locinput = li;
@@ -8466,20 +8736,26 @@ NULL
                     n = (ST.oldloc == locinput) ? 0 : 1;
                     if (ST.c1 == ST.c2) {
                         /* set n to utf8_distance(oldloc, locinput) */
-                       while (locinput <= ST.maxpos
-                              && memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)))
+                       while (    locinput <= ST.maxpos
+                               &&  locinput < loceol
+                               &&  memNE(locinput, ST.c1_utf8,
+                                    UTF8_SAFE_SKIP(locinput, reginfo->strend)))
                          {
-                           locinput += UTF8SKIP(locinput);
+                           locinput += UTF8_SAFE_SKIP(locinput,
+                                                       reginfo->strend);
                             n++;
                         }
                     }
                     else {
                         /* set n to utf8_distance(oldloc, locinput) */
-                       while (locinput <= ST.maxpos
-                              && memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
-                              && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
+                       while (   locinput <= ST.maxpos
+                               && locinput < loceol
+                               && memNE(locinput, ST.c1_utf8,
+                                     UTF8_SAFE_SKIP(locinput, reginfo->strend))
+                               && memNE(locinput, ST.c2_utf8,
+                                    UTF8_SAFE_SKIP(locinput, reginfo->strend)))
                          {
-                           locinput += UTF8SKIP(locinput);
+                           locinput += UTF8_SAFE_SKIP(locinput, reginfo->strend);
                             n++;
                         }
                     }
@@ -8536,7 +8812,7 @@ NULL
                       * locinput matches */
                      char *li = ST.oldloc;
                     ST.count += n;
-                    if (regrepeat(rex, &li, ST.A, reginfo, n) < n)
+                    if (regrepeat(rex, &li, ST.A, loceol, reginfo, n) < n)
                         sayNO;
                      assert(n == REG_INFTY || locinput == li);
                 }
@@ -8544,34 +8820,36 @@ NULL
  
            curly_try_B_min:
              CURLY_SETPAREN(ST.paren, ST.count);
-            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
+            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
  
            curly_try_B_max:
             /* a successful greedy match: now try to match B */
             {
-               bool could_match = locinput < reginfo->strend;
+               bool could_match = locinput <  loceol;
  
                 /* If it could work, try it. */
                  if (ST.c1 != CHRTEST_VOID && could_match) {
                      if (! UTF8_IS_INVARIANT(UCHARAT(locinput)) && utf8_target)
                      {
-                        could_match = memEQ(locinput,
-                                            ST.c1_utf8,
-                                            UTF8SKIP(locinput))
-                                    || memEQ(locinput,
-                                             ST.c2_utf8,
-                                             UTF8SKIP(locinput));
+                        could_match =  memEQ(locinput, ST.c1_utf8,
+                                             UTF8_SAFE_SKIP(locinput,
+                                                            reginfo->strend))
+                                    || memEQ(locinput, ST.c2_utf8,
+                                             UTF8_SAFE_SKIP(locinput,
+                                                            reginfo->strend));
                      }
                      else {
-                        could_match = UCHARAT(locinput) == ST.c1
-                                      || UCHARAT(locinput) == ST.c2;
+                        could_match =   UCHARAT(locinput) == ST.c1
+                                     || UCHARAT(locinput) == ST.c2;
                      }
                  }
                  if (ST.c1 == CHRTEST_VOID || could_match) {
                     CURLY_SETPAREN(ST.paren, ST.count);
-                   PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput);
+                   PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput, loceol,
+                                    script_run_begin);
                     NOT_REACHED; /* NOTREACHED */
                 }
             }
@@ -8626,8 +8904,9 @@ NULL
  
                  SET_RECURSE_LOCINPUT("FAKE-END[after]", cur_eval->locinput);
  
-                PUSH_YES_STATE_GOTO(EVAL_postponed_AB, st->u.eval.prev_eval->u.eval.B,
-                                    locinput); /* match B */
+                PUSH_YES_STATE_GOTO(EVAL_postponed_AB,          /* match B */
+                                    st->u.eval.prev_eval->u.eval.B,
+                                    locinput, loceol, script_run_begin);
             }
  
             if (locinput < reginfo->till) {
@@ -8651,12 +8930,11 @@ NULL
  #undef  ST
  #define ST st->u.ifmatch
  
-        {
-            char *newstart;
-
         case SUSPEND:   /* (?>A) */
             ST.wanted = 1;
-           newstart = locinput;
+           ST.start = locinput;
+           ST.end = loceol;
+            ST.count = 1;
             goto do_ifmatch;    
  
         case UNLESSM:   /* -ve lookaround: (?!A), or with 'flags', (?<!A) */
@@ -8666,25 +8944,47 @@ NULL
         case IFMATCH:   /* +ve lookaround: (?=A), or with 'flags', (?<=A) */
             ST.wanted = 1;
           ifmatch_trivial_fail_test:
-           if (scan->flags) {
-               char * const s = HOPBACKc(locinput, scan->flags);
-               if (!s) {
-                   /* trivial fail */
-                   if (logical) {
-                       logical = 0;
-                       sw = 1 - cBOOL(ST.wanted);
-                   }
-                   else if (ST.wanted)
-                       sayNO;
-                   next = scan + ARG(scan);
-                   if (next == scan)
-                       next = NULL;
-                   break;
-               }
-               newstart = s;
+            ST.count = scan->next_off + 1; /* next_off repurposed to be
+                                              lookbehind count, requires
+                                              non-zero flags */
+           if (! scan->flags) {    /* 'flags' zero means lookahed */
+
+                /* Lookahead starts here and ends at the normal place */
+               ST.start = locinput;
+               ST.end = loceol;
+            }
+           else {
+                PERL_UINT_FAST8_T back_count = scan->flags;
+               char * s;
+
+                /* Lookbehind can look beyond the current position */
+               ST.end = loceol;
+
+                /* ... and starts at the first place in the input that is in
+                 * the range of the possible start positions */
+                for (; ST.count > 0; ST.count--, back_count--) {
+                    s = HOPBACKc(locinput, back_count);
+                    if (s) {
+                        ST.start = s;
+                        goto do_ifmatch;
+                    }
+                }
+
+                /* If the lookbehind doesn't start in the actual string, is a
+                 * trivial match failure */
+                if (logical) {
+                    logical = 0;
+                    sw = 1 - cBOOL(ST.wanted);
+                }
+                else if (ST.wanted)
+                    sayNO;
+
+                /* Here, we didn't want it to match, so is actually success */
+                next = scan + ARG(scan);
+                if (next == scan)
+                    next = NULL;
+                break;
             }
-           else
-               newstart = locinput;
  
           do_ifmatch:
             ST.me = scan;
@@ -8692,29 +8992,48 @@ NULL
             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
             
             /* execute body of (?...A) */
-           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), newstart);
+           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), ST.start,
+                                ST.end, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
-        }
+
+        {
+            bool matched;
  
         case IFMATCH_A_fail: /* body of (?...A) failed */
-           ST.wanted = !ST.wanted;
-           /* FALLTHROUGH */
+           if (! ST.logical && ST.count > 1) {
+
+                /* It isn't a real failure until we've tried all starting
+                 * positions.  Move to the next starting position and retry */
+                ST.count--;
+                ST.start = HOPc(ST.start, 1);
+                scan = ST.me;
+                logical = ST.logical;
+                goto do_ifmatch;
+            }
+
+            /* Here, all starting positions have been tried. */
+           matched = FALSE;
+           goto ifmatch_done;
  
         case IFMATCH_A: /* body of (?...A) succeeded */
-           if (ST.logical) {
-               sw = cBOOL(ST.wanted);
-           }
-           else if (!ST.wanted)
-               sayNO;
+           matched = TRUE;
+          ifmatch_done:
+            sw = matched == ST.wanted;
+           if (! ST.logical && !sw) {
+                sayNO;
+            }
  
             if (OP(ST.me) != SUSPEND) {
                  /* restore old position except for (?>...) */
                 locinput = st->locinput;
+                loceol = st->loceol;
+                script_run_begin = st->sr0;
             }
             scan = ST.me + ARG(ST.me);
             if (scan == ST.me)
                 scan = NULL;
             continue; /* execute B */
+        }
  
  #undef ST
  
@@ -8726,13 +9045,14 @@ NULL
             break;
  
         case COMMIT:  /*  (*COMMIT)  */
-           reginfo->cutpoint = reginfo->strend;
+           reginfo->cutpoint = loceol;
             /* FALLTHROUGH */
  
         case PRUNE:   /*  (*PRUNE)   */
              if (scan->flags)
                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
-           PUSH_STATE_GOTO(COMMIT_next, next, locinput);
+           PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case COMMIT_next_fail:
@@ -8762,7 +9082,8 @@ NULL
                  = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
              mark_state = st;
              ST.mark_loc = locinput;
-            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput);
+            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput, loceol,
+                                script_run_begin);
              NOT_REACHED; /* NOTREACHED */
  
          case MARKPOINT_next:
@@ -8795,7 +9116,8 @@ NULL
                  /* (*SKIP) : if we fail we cut here*/
                  ST.mark_name = NULL;
                  ST.mark_loc = locinput;
-                PUSH_STATE_GOTO(SKIP_next,next, locinput);
+                PUSH_STATE_GOTO(SKIP_next,next, locinput, loceol,
+                                script_run_begin);
              } else {
                  /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was, 
                     otherwise do nothing.  Meaning we need to scan 
@@ -8808,7 +9130,8 @@ NULL
                                  find ) ) 
                      {
                          ST.mark_name = find;
-                        PUSH_STATE_GOTO( SKIP_next, next, locinput);
+                        PUSH_STATE_GOTO( SKIP_next, next, locinput, loceol,
+                                         script_run_begin);
                      }
                      cur = cur->u.mark.prev_mark;
                  }
@@ -8837,7 +9160,7 @@ NULL
  #undef ST
  
          case LNBREAK: /* \R */
-            if ((n=is_LNBREAK_safe(locinput, reginfo->strend, utf8_target))) {
+            if ((n=is_LNBREAK_safe(locinput, loceol, utf8_target))) {
                  locinput += n;
              } else
                  sayNO;
@@ -8856,7 +9179,7 @@ NULL
                  locinput += PL_utf8skip[nextchr];
                  /* locinput is allowed to go 1 char off the end (signifying
                   * EOS), but not 2+ */
-                if (locinput > reginfo->strend)
+                if (locinput >  loceol)
                      sayNO;
              }
              else
@@ -8879,8 +9202,10 @@ NULL
         /* push a new regex state, then continue at scan  */
         {
             regmatch_state *newst;
+            DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
-           DEBUG_STACK_r({
+            DEBUG_r( /* DEBUG_STACK_r */
+              if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_STACK)) {
                 regmatch_state *cur = st;
                 regmatch_state *curyes = yes_state;
                 U32 i;
@@ -8899,17 +9224,21 @@ NULL
                      if (curyes == cur)
                         curyes = cur->u.yes.prev_yes_state;
                  }
-            } else 
+            } else {
                  DEBUG_STATE_pp("push")
-            );
+            });
             depth++;
             st->locinput = locinput;
+           st->loceol = loceol;
+            st->sr0 = script_run_begin;
             newst = st+1; 
             if (newst >  SLAB_LAST(PL_regmatch_slab))
                 newst = S_push_slab(aTHX);
             PL_regmatch_state = newst;
  
             locinput = pushinput;
+            loceol = pusheol;
+            script_run_begin = pushsr0;
             st = newst;
             continue;
              /* NOTREACHED */
@@ -8962,8 +9291,11 @@ NULL
         yes_state = st->u.yes.prev_yes_state;
         PL_regmatch_state = st;
          
-        if (no_final)
+        if (no_final) {
              locinput= st->locinput;
+            loceol= st->loceol;
+            script_run_begin = st->sr0;
+        }
         state_num = st->resume_state + no_final;
         goto reenter_switch;
      }
@@ -9013,6 +9345,8 @@ NULL
         }
         PL_regmatch_state = st;
         locinput= st->locinput;
+       loceol= st->loceol;
+        script_run_begin = st->sr0;
  
         DEBUG_STATE_pp("pop");
         depth--;
@@ -9068,18 +9402,19 @@ NULL
   *             to point to the byte following the highest successful
   *             match.
   * p         - the regnode to be repeatedly matched against.
- * reginfo   - struct holding match state, such as strend
+ * loceol    - pointer to the end position beyond which we aren't supposed to
+ *             look.
+ * reginfo   - struct holding match state, such as utf8_target
   * max       - maximum number of things to match.
   * depth     - (for debugging) backtracking depth.
   */
  STATIC I32
  S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
-            regmatch_info *const reginfo, I32 max _pDEPTH)
+            char * loceol, regmatch_info *const reginfo, I32 max _pDEPTH)
  {
-    dVAR;
      char *scan;     /* Pointer to current position in target string */
      I32 c;
-    char *loceol = reginfo->strend;   /* local version */
+    char *this_eol = loceol;   /* potentially adjusted version. */
      I32 hardcount = 0;  /* How many matches so far */
      bool utf8_target = reginfo->is_utf8_target;
      unsigned int to_complement = 0;  /* Invert the result? */
@@ -9097,15 +9432,15 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      if (max == REG_INFTY)   /* This is a special marker to go to the platform's
                                 max */
         max = I32_MAX;
-    else if (! utf8_target && loceol - scan > max)
-       loceol = scan + max;
+    else if (! utf8_target && this_eol - scan > max)
+       this_eol = scan + max;
  
-    /* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
+    /* Here, for the case of a non-UTF-8 target we have adjusted <this_eol> down
       * to the maximum of how far we should go in it (leaving it set to the real
       * end, if the maximum permissible would take us beyond that).  This allows
-     * us to make the loop exit condition that we haven't gone past <loceol> to
+     * us to make the loop exit condition that we haven't gone past <this_eol> to
       * also mean that we haven't exceeded the max permissible count, saving a
-     * test each time through the loop.  But it assumes that the OP matches a
+     * test each time through the loops.  But it assumes that the OP matches a
       * single byte, which is true for most of the OPs below when applied to a
       * non-UTF-8 target.  Those relatively few OPs that don't have this
       * characteristic will have to compensate.
@@ -9113,40 +9448,56 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
       * There is no adjustment for UTF-8 targets, as the number of bytes per
       * character varies.  OPs will have to test both that the count is less
       * than the max permissible (using <hardcount> to keep track), and that we
-     * are still within the bounds of the string (using <loceol>.  A few OPs
+     * are still within the bounds of the string (using <this_eol>.  A few OPs
       * match a single byte no matter what the encoding.  They can omit the max
       * test if, for the UTF-8 case, they do the adjustment that was skipped
       * above.
       *
       * Thus, the code above sets things up for the common case; and exceptional
       * cases need extra work; the common case is to make sure <scan> doesn't
-     * go past <loceol>, and for UTF-8 to also use <hardcount> to make sure the
+     * go past <this_eol>, and for UTF-8 to also use <hardcount> to make sure the
       * count doesn't exceed the maximum permissible */
  
      switch (OP(p)) {
      case REG_ANY:
         if (utf8_target) {
-           while (scan < loceol && hardcount < max && *scan != '\n') {
+           while (scan < this_eol && hardcount < max && *scan != '\n') {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         } else {
-            scan = (char *) memchr(scan, '\n', loceol - scan);
+            scan = (char *) memchr(scan, '\n', this_eol - scan);
              if (! scan) {
-                scan = loceol;
+                scan = this_eol;
              }
         }
         break;
      case SANY:
          if (utf8_target) {
-           while (scan < loceol && hardcount < max) {
+           while (scan < this_eol && hardcount < max) {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         }
         else
-           scan = loceol;
+           scan = this_eol;
         break;
+
+    case LEXACT_REQ8:
+        if (! utf8_target) {
+            break;
+        }
+        /* FALLTHROUGH */
+
+    case LEXACT:
+      {
+        U8 * string;
+        Size_t str_len;
+
+       string = (U8 *) STRINGl(p);
+        str_len = STR_LENl(p);
+        goto join_short_long_exact;
+
      case EXACTL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
          if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) {
@@ -9154,28 +9505,32 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          }
          goto do_exact;
  
-    case EXACT_ONLY8:
+    case EXACT_REQ8:
          if (! utf8_target) {
              break;
          }
          /* FALLTHROUGH */
      case EXACT:
        do_exact:
-        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+       string = (U8 *) STRINGs(p);
+        str_len = STR_LENs(p);
  
-       c = (U8)*STRING(p);
+      join_short_long_exact:
+        assert(str_len == reginfo->is_utf8_pat ? UTF8SKIP(string) : 1);
+
+       c = *string;
  
          /* Can use a simple find if the pattern char to match on is invariant
           * under UTF-8, or both target and pattern aren't UTF-8.  Note that we
           * can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
           * true iff it doesn't matter if the argument is in UTF-8 or not */
          if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! reginfo->is_utf8_pat)) {
-            if (utf8_target && loceol - scan > max) {
-                /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
+            if (utf8_target && this_eol - scan > max) {
+                /* We didn't adjust <this_eol> because is UTF-8, but ok to do so,
                   * since here, to match at all, 1 char == 1 byte */
-                loceol = scan + max;
+                this_eol = scan + max;
              }
-            scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c);
+            scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
         }
         else if (reginfo->is_utf8_pat) {
              if (utf8_target) {
@@ -9184,9 +9539,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  /* When both target and pattern are UTF-8, we have to do
                   * string EQ */
                  while (hardcount < max
-                       && scan < loceol
-                       && (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
-                       && memEQ(scan, STRING(p), scan_char_len))
+                       && scan < this_eol
+                       && (scan_char_len = UTF8SKIP(scan)) <= str_len
+                       && memEQ(scan, string, scan_char_len))
                  {
                      scan += scan_char_len;
                      hardcount++;
@@ -9196,8 +9551,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
                  /* Target isn't utf8; convert the character in the UTF-8
                   * pattern to non-UTF8, and do a simple find */
-                c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
-                scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c);
+                c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(string + 1));
+                scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
              } /* else pattern char is above Latin1, can't possibly match the
                   non-UTF-8 target */
          }
@@ -9211,7 +9566,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             U8 low = UTF8_TWO_BYTE_LO(c);
  
             while (hardcount < max
-                   && scan + 1 < loceol
+                   && scan + 1 < this_eol
                     && UCHARAT(scan) == high
                     && UCHARAT(scan + 1) == low)
             {
@@ -9220,6 +9575,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             }
         }
         break;
+      }
  
      case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
          assert(! reginfo->is_utf8_pat);
@@ -9252,7 +9608,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                                      | FOLDEQ_S2_FOLDS_SANE;
          goto do_exactf;
  
-    case EXACTFU_ONLY8:
+    case EXACTFU_REQ8:
          if (! utf8_target) {
              break;
          }
@@ -9270,48 +9626,51 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          int c1, c2;
          U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
  
-        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LENs(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRINGs(p)) : 1);
  
          if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
                                          reginfo))
          {
              if (c1 == CHRTEST_VOID) {
                  /* Use full Unicode fold matching */
-                char *tmpeol = reginfo->strend;
-                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
+                char *tmpeol = loceol;
+                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRINGs(p)) : 1;
                  while (hardcount < max
                          && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
-                                             STRING(p), NULL, pat_len,
+                                             STRINGs(p), NULL, pat_len,
                                               reginfo->is_utf8_pat, utf8_flags))
                  {
                      scan = tmpeol;
-                    tmpeol = reginfo->strend;
+                    tmpeol = loceol;
                      hardcount++;
                  }
              }
              else if (utf8_target) {
                  if (c1 == c2) {
-                    while (scan < loceol
+                    while (scan < this_eol
                             && hardcount < max
-                           && memEQ(scan, c1_utf8, UTF8SKIP(scan)))
+                           && memEQ(scan, c1_utf8, UTF8_SAFE_SKIP(scan,
+                                                                  loceol)))
                      {
-                        scan += UTF8SKIP(scan);
+                        scan += UTF8SKIP(c1_utf8);
                          hardcount++;
                      }
                  }
                  else {
-                    while (scan < loceol
+                    while (scan < this_eol
                             && hardcount < max
-                           && (memEQ(scan, c1_utf8, UTF8SKIP(scan))
-                               || memEQ(scan, c2_utf8, UTF8SKIP(scan))))
+                           && (   memEQ(scan, c1_utf8, UTF8_SAFE_SKIP(scan,
+                                                                     loceol))
+                               || memEQ(scan, c2_utf8, UTF8_SAFE_SKIP(scan,
+                                                                     loceol))))
                      {
-                        scan += UTF8SKIP(scan);
+                        scan += UTF8_SAFE_SKIP(scan, loceol);
                          hardcount++;
                      }
                  }
              }
              else if (c1 == c2) {
-                scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c1);
+                scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c1);
              }
              else {
                  /* See comments in regmatch() CURLY_B_min_known_fail.  We avoid
@@ -9323,12 +9682,12 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      U8 c1_c2_mask = ~ c1_c2_bits_differing;
  
                      scan = (char *) find_span_end_mask((U8 *) scan,
-                                                       (U8 *) loceol,
+                                                       (U8 *) this_eol,
                                                         c1 & c1_c2_mask,
                                                         c1_c2_mask);
                  }
                  else {
-                    while (    scan < loceol
+                    while (    scan < this_eol
                             && (UCHARAT(scan) == c1 || UCHARAT(scan) == c2))
                      {
                          scan++;
@@ -9350,40 +9709,40 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      case ANYOF:
         if (utf8_target) {
             while (hardcount < max
-                   && scan < loceol
-                  && reginclass(prog, p, (U8*)scan, (U8*) loceol, utf8_target))
+                   && scan < this_eol
+                  && reginclass(prog, p, (U8*)scan, (U8*) this_eol, utf8_target))
             {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         }
          else if (ANYOF_FLAGS(p) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
-           while (scan < loceol
+           while (scan < this_eol
                      && reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0))
                 scan++;
          }
          else {
-           while (scan < loceol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
+           while (scan < this_eol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
                 scan++;
         }
         break;
  
      case ANYOFM:
-        if (utf8_target && loceol - scan > max) {
+        if (utf8_target && this_eol - scan > max) {
  
-            /* We didn't adjust <loceol> at the beginning of this routine
+            /* We didn't adjust <this_eol> at the beginning of this routine
               * because is UTF-8, but it is actually ok to do so, since here, to
               * match, 1 char == 1 byte. */
-            loceol = scan + max;
+            this_eol = scan + max;
          }
  
-        scan = (char *) find_span_end_mask((U8 *) scan, (U8 *) loceol, (U8) ARG(p), FLAGS(p));
+        scan = (char *) find_span_end_mask((U8 *) scan, (U8 *) this_eol, (U8) ARG(p), FLAGS(p));
          break;
  
      case NANYOFM:
         if (utf8_target) {
             while (     hardcount < max
-                   &&   scan < loceol
+                   &&   scan < this_eol
                    &&  (*scan & FLAGS(p)) != ARG(p))
             {
                 scan += UTF8SKIP(scan);
@@ -9391,18 +9750,115 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             }
         }
          else {
-            scan = (char *) find_next_masked((U8 *) scan, (U8 *) loceol, (U8) ARG(p), FLAGS(p));
+            scan = (char *) find_next_masked((U8 *) scan, (U8 *) this_eol, (U8) ARG(p), FLAGS(p));
         }
          break;
  
      case ANYOFH:
-        if (utf8_target) while (   hardcount < max
-                                && scan < loceol
-                                && reginclass(prog, p, (U8*)scan, (U8*) loceol,
-                                                                  TRUE))
-        {
-            scan += UTF8SKIP(scan);
-            hardcount++;
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (  hardcount < max
+                   && scan < this_eol
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHb:
+        if (utf8_target) {  /* ANYOFHb only can match UTF-8 targets */
+
+            /* we know the first byte must be the FLAGS field */
+            while (   hardcount < max
+                   && scan < this_eol
+                   && (U8) *scan == ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol,
+                                                              TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHr:
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (  hardcount < max
+                   && scan < this_eol
+                   && inRANGE(NATIVE_UTF8_TO_I8(*scan),
+                              LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)),
+                              HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)))
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHs:
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (   hardcount < max
+                   && scan + FLAGS(p) < this_eol
+                   && memEQ(scan, ((struct regnode_anyofhs *) p)->string, FLAGS(p))
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFR:
+        if (utf8_target) {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) scan,
+                                                (U8 *) this_eol,
+                                                NULL),
+                                  ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        else {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && withinCOUNT((U8) *scan, ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan++;
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFRb:
+        if (utf8_target) {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && (U8) *scan == ANYOF_FLAGS(p)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) scan,
+                                                (U8 *) this_eol,
+                                                NULL),
+                                  ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        else {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && withinCOUNT((U8) *scan, ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan++;
+                hardcount++;
+            }
          }
          break;
  
@@ -9415,16 +9871,16 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      case POSIXL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
         if (! utf8_target) {
-           while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
+           while (scan < this_eol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
                                                                     *scan)))
              {
                 scan++;
              }
         } else {
-           while (hardcount < max && scan < loceol
+           while (hardcount < max && scan < this_eol
                     && to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(p),
                                                                    (U8 *) scan,
-                                                                  (U8 *) loceol)))
+                                                                  (U8 *) this_eol)))
              {
                  scan += UTF8SKIP(scan);
                 hardcount++;
@@ -9439,14 +9895,14 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          /* FALLTHROUGH */
  
      case POSIXA:
-        if (utf8_target && loceol - scan > max) {
+        if (utf8_target && this_eol - scan > max) {
  
-            /* We didn't adjust <loceol> at the beginning of this routine
+            /* We didn't adjust <this_eol> at the beginning of this routine
               * because is UTF-8, but it is actually ok to do so, since here, to
               * match, 1 char == 1 byte. */
-            loceol = scan + max;
+            this_eol = scan + max;
          }
-        while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
+        while (scan < this_eol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
             scan++;
         }
         break;
@@ -9460,7 +9916,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case NPOSIXA:
          if (! utf8_target) {
-            while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
+            while (scan < this_eol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
                  scan++;
              }
          }
@@ -9468,8 +9924,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
              /* The complement of something that matches only ASCII matches all
               * non-ASCII, plus everything in ASCII that isn't in the class. */
-           while (hardcount < max && scan < loceol
-                   && (   ! isASCII_utf8_safe(scan, reginfo->strend)
+           while (hardcount < max && scan < this_eol
+                   && (   ! isASCII_utf8_safe(scan, loceol)
                         || ! _generic_isCC_A((U8) *scan, FLAGS(p))))
              {
                  scan += UTF8SKIP(scan);
@@ -9484,7 +9940,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case POSIXU:
         if (! utf8_target) {
-            while (scan < loceol && to_complement
+            while (scan < this_eol && to_complement
                                  ^ cBOOL(_generic_isCC((U8) *scan, FLAGS(p))))
              {
                  scan++;
@@ -9495,11 +9951,11 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
              classnum = (_char_class_number) FLAGS(p);
              switch (classnum) {
                  default:
-                    while (   hardcount < max && scan < loceol
+                    while (   hardcount < max && scan < this_eol
                             && to_complement ^ cBOOL(_invlist_contains_cp(
                                                PL_XPosix_ptrs[classnum],
                                                utf8_to_uvchr_buf((U8 *) scan,
-                                                                (U8 *) loceol,
+                                                                (U8 *) this_eol,
                                                                  NULL))))
                      {
                          scan += UTF8SKIP(scan);
@@ -9515,9 +9971,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
                  case _CC_ENUM_SPACE:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isSPACE_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isSPACE_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9525,9 +9981,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_BLANK:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                                ^ cBOOL(isBLANK_utf8_safe(scan, loceol))))
+                                ^ cBOOL(isBLANK_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9535,9 +9991,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_XDIGIT:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isXDIGIT_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isXDIGIT_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9545,9 +10001,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_VERTSPACE:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isVERTWS_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isVERTWS_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9555,9 +10011,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_CNTRL:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isCNTRL_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isCNTRL_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9569,16 +10025,15 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case LNBREAK:
          if (utf8_target) {
-           while (hardcount < max && scan < loceol &&
-                    (c=is_LNBREAK_utf8_safe(scan, loceol))) {
+           while (hardcount < max && scan < this_eol &&
+                    (c=is_LNBREAK_utf8_safe(scan, this_eol))) {
                 scan += c;
                 hardcount++;
             }
         } else {
              /* LNBREAK can match one or two latin chars, which is ok, but we
               * have to use hardcount in this situation, and throw away the
-             * adjustment to <loceol> done before the switch statement */
-            loceol = reginfo->strend;
+             * adjustment to <this_eol> done before the switch statement */
             while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
                 scan+=c;
                 hardcount++;
@@ -9618,7 +10073,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      *startposp = scan;
  
      DEBUG_r({
-       GET_RE_DEBUG_FLAGS_DECL;
+       DECLARE_AND_GET_RE_DEBUG_FLAGS;
         DEBUG_EXECUTE_r({
             SV * const prop = sv_newmortal();
              regprop(prog, prop, p, reginfo, NULL);
@@ -9649,8 +10104,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  STATIC bool
  S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const U8* const p_end, const bool utf8_target)
  {
-    dVAR;
-    const char flags = ANYOF_FLAGS(n);
+    const char flags = (inRANGE(OP(n), ANYOFH, ANYOFHs))
+                        ? 0
+                        : ANYOF_FLAGS(n);
      bool match = FALSE;
      UV c = *p;
  
@@ -9677,7 +10133,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
      }
  
      /* If this character is potentially in the bitmap, check it */
-    if (c < NUM_ANYOF_CODE_POINTS && OP(n) != ANYOFH) {
+    if (c < NUM_ANYOF_CODE_POINTS && ! inRANGE(OP(n), ANYOFH, ANYOFHb)) {
         if (ANYOF_BITMAP_TEST(n, c))
             match = TRUE;
         else if ((flags
@@ -9690,7 +10146,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
         }
         else if (flags & ANYOF_LOCALE_FLAGS) {
             if (  (flags & ANYOFL_FOLD)
-                && c < sizeof(PL_fold_locale)
+                && c < 256
                 && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
              {
                  match = TRUE;
@@ -9778,8 +10234,14 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
                           && IN_UTF8_CTYPE_LOCALE)))
          {
              SV* only_utf8_locale = NULL;
-           SV * const definition = _get_regclass_nonbitmap_data(prog, n, TRUE,
-                                                   0, &only_utf8_locale, NULL);
+           SV * const definition =
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+                get_regclass_nonbitmap_data(prog, n, TRUE, 0,
+                                            &only_utf8_locale, NULL);
+#else
+                get_re_gclass_nonbitmap_data(prog, n, TRUE, 0,
+                                             &only_utf8_locale, NULL);
+#endif
             if (definition) {
                  U8 utf8_buffer[2];
                 U8 * utf8_p;
@@ -9979,6 +10441,7 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
      regmatch_info_aux_eval *eval_state = reginfo->info_aux_eval;
  
      eval_state->rex = rex;
+    eval_state->sv  = reginfo->sv;
  
      if (reginfo->sv) {
          /* Make $_ available to executed code. */
@@ -9986,6 +10449,8 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
              SAVE_DEFSV;
              DEFSV_set(reginfo->sv);
          }
+        /* will be dec'd by S_cleanup_regmatch_info_aux */
+        SvREFCNT_inc_NN(reginfo->sv);
  
          if (!(mg = mg_find_mglob(reginfo->sv))) {
              /* prepare for quick setting of pos */
@@ -10011,7 +10476,7 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
              /* this regexp is also owned by the new PL_reg_curpm, which
                 will try to free it.  */
              av_push(PL_regex_padav, repointer);
-            PL_reg_curpm->op_pmoffset = av_tindex(PL_regex_padav);
+            PL_reg_curpm->op_pmoffset = av_top_index(PL_regex_padav);
              PL_regex_pad = AvARRAY(PL_regex_padav);
          }
  #endif
@@ -10077,6 +10542,7 @@ S_cleanup_regmatch_info_aux(pTHX_ void *arg)
          }
  
          PL_curpm = eval_state->curpm;
+        SvREFCNT_dec(eval_state->sv);
      }
  
      PL_regmatch_state = aux->old_regmatch_state;
@@ -10147,6 +10613,7 @@ S_to_byte_substr(pTHX_ regexp *prog)
             && !prog->substrs->data[i].substr) {
             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
             if (! sv_utf8_downgrade(sv, TRUE)) {
+                SvREFCNT_dec_NN(sv);
                  return FALSE;
              }
              if (SvVALID(prog->substrs->data[i].utf8_substr)) {
@@ -10170,23 +10637,22 @@ S_to_byte_substr(pTHX_ regexp *prog)
  #ifndef PERL_IN_XSUB_RE
  
  bool
-Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const UV cp)
+Perl_is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const UV cp)
  {
      /* Temporary helper function for toke.c.  Verify that the code point 'cp'
       * is a stand-alone grapheme.  The UTF-8 for 'cp' begins at position 's' in
       * the larger string bounded by 'strbeg' and 'strend'.
       *
-     * 'cp' needs to be assigned (if not a future version of the Unicode
+     * 'cp' needs to be assigned (if not, a future version of the Unicode
       * Standard could make it something that combines with adjacent characters,
       * so code using it would then break), and there has to be a GCB break
       * before and after the character. */
  
-    dVAR;
  
      GCB_enum cp_gcb_val, prev_cp_gcb_val, next_cp_gcb_val;
      const U8 * prev_cp_start;
  
-    PERL_ARGS_ASSERT__IS_GRAPHEME;
+    PERL_ARGS_ASSERT_IS_GRAPHEME;
  
      if (   UNLIKELY(UNICODE_IS_SUPER(cp))
          || UNLIKELY(UNICODE_IS_NONCHAR(cp)))
@@ -10234,7 +10700,7 @@ Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, cons
  }
  
  /*
-=head1 Unicode Support
+=for apidoc_section Unicode Support
  
  =for apidoc isSCRIPT_RUN
  
@@ -10303,7 +10769,6 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
       * characters for at least one language in the Unicode Common Locale Data
       * Repository [CLDR]. */
  
-    dVAR;
  
      /* Things that match /\d/u */
      SV * decimals_invlist = PL_XPosix_ptrs[_CC_DIGIT];
@@ -10392,10 +10857,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          /* If is within the range [+0 .. +9] of the script's zero, it also is a
           * digit in that script.  We can skip the rest of this code for this
           * character. */
-        if (UNLIKELY(   zero_of_run
-                     && cp >= zero_of_run
-                     && cp - zero_of_run <= 9))
-        {
+        if (UNLIKELY(zero_of_run && withinCOUNT(cp, zero_of_run, 9))) {
              continue;
          }
  
@@ -10616,7 +11078,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
           * several scripts, and the intersection is not empty.  However, if the
           * character is a decimal digit, it could still mean failure if it is
           * from the wrong sequence of 10.  So, we need to look at if it's a
-         * digit.  We've already handled the 10 decimal digits, and the next
+         * digit.  We've already handled the 10 digits [0-9], and the next
           * lowest one is this one: */
          if (cp < FIRST_NON_ASCII_DECIMAL_DIGIT) {
              continue;   /* Not a digit; this character is part of the run */
@@ -10628,9 +11090,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          if (   script_of_char >= 0
              && (zero_of_char = script_zeros[script_of_char]))
          {
-            if (   cp < zero_of_char
-                || cp > zero_of_char + 9)
-            {
+            if (! withinCOUNT(cp, zero_of_char, 9)) {
                  continue;   /* Not a digit; this character is part of the run
                               */
              }