Various updates and fixes to some of the SysV IPC ops and their tests

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index e00583a..f3edc3a 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -83,16 +83,33 @@
  #include "invlist_inline.h"
  #include "unicode_constants.h"
  
-#define B_ON_NON_UTF8_LOCALE_IS_WRONG            \
- "Use of \\b{} or \\B{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale"
+static const char b_utf8_locale_required[] =
+ "Use of \\b{} or \\B{} for non-UTF-8 locale is wrong."
+                                                "  Assuming a UTF-8 locale";
+
+#define CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND                       \
+    STMT_START {                                                            \
+        if (! IN_UTF8_CTYPE_LOCALE) {                                       \
+          Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),                       \
+                                                b_utf8_locale_required);    \
+        }                                                                   \
+    } STMT_END
  
-static const char utf8_locale_required[] =
+static const char sets_utf8_locale_required[] =
        "Use of (?[ ]) for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale";
  
+#define CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(n)                     \
+    STMT_START {                                                            \
+        if (! IN_UTF8_CTYPE_LOCALE && ANYOFL_UTF8_LOCALE_REQD(FLAGS(n))) {  \
+          Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),                       \
+                                             sets_utf8_locale_required);    \
+        }                                                                   \
+    } STMT_END
+
  #ifdef DEBUGGING
  /* At least one required character in the target string is expressible only in
   * UTF-8. */
-static const char* const non_utf8_target_but_utf8_required
+static const char non_utf8_target_but_utf8_required[]
                  = "Can't match, because target string needs to be in UTF-8\n";
  #endif
  
@@ -101,8 +118,6 @@ static const char* const non_utf8_target_but_utf8_required
      goto target;                                                         \
  } STMT_END
  
-#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
-
  #ifndef STATIC
  #define        STATIC  static
  #endif
@@ -152,16 +167,6 @@ static const char* const non_utf8_target_but_utf8_required
      : (U8*)(pos + off))
  #define HOP4c(pos,off,llim, rlim) ((char*)HOP4(pos,off,llim, rlim))
  
-#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
-#define NEXTCHR_IS_EOS (nextchr < 0)
-
-#define SET_nextchr \
-    nextchr = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
-
-#define SET_locinput(p) \
-    locinput = (p);  \
-    SET_nextchr
-
  #define PLACEHOLDER    /* Something for the preprocessor to grab onto */
  /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
  
@@ -228,7 +233,7 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
      const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
      const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
      I32 p;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGCPPUSH;
  
@@ -338,7 +343,7 @@ S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p _pDEPTH)
  {
      UV i;
      U32 paren;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGCPPOP;
  
@@ -432,7 +437,7 @@ Perl_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
       * Ideally this could be replaced by a just an array of function pointers
       * to the C library functions that implement the macros this calls.
       * However, to compile, the precise function signatures are required, and
-     * these may vary from platform to to platform.  To avoid having to figure
+     * these may vary from platform to platform.  To avoid having to figure
       * out what those all are on each platform, I (khw) am using this method,
       * which adds an extra layer of function call overhead (unless the C
       * optimizer strips it away).  But we don't particularly care about
@@ -506,7 +511,6 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e)
       * rules, ignoring any locale.  So use the Unicode function if this class
       * requires an inversion list, and use the Unicode macro otherwise. */
  
-    dVAR;
  
      PERL_ARGS_ASSERT_ISFOO_UTF8_LC;
  
@@ -589,7 +593,7 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
              span_word |= span_word << 4;
  
              /* That reduces the problem to what this function solves */
-            return s + _variant_byte_number(span_word);
+            return s + variant_byte_number(span_word);
  
  #endif
  
@@ -667,7 +671,7 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
              masked &= PERL_VARIANTS_WORD_MASK;
  
              /* This reduces the problem to that solved by this function */
-            s += _variant_byte_number(masked);
+            s += variant_byte_number(masked);
              return s;
  
          } while (s + PERL_WORDSIZE <= send);
@@ -733,7 +737,7 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
              masked |= masked << 1;
              masked |= masked << 2;
              masked |= masked << 4;
-            return s + _variant_byte_number(masked);
+            return s + variant_byte_number(masked);
  
  #endif
  
@@ -869,7 +873,7 @@ Perl_re_intuit_start(pTHX_
      RXi_GET_DECL(prog,progi);
      regmatch_info reginfo_buf;  /* create some info to pass to find_byclass */
      regmatch_info *const reginfo = &reginfo_buf;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_RE_INTUIT_START;
      PERL_UNUSED_ARG(flags);
@@ -1183,8 +1187,8 @@ Perl_re_intuit_start(pTHX_
  
      /* now look for the 'other' substring if defined */
  
-    if (utf8_target ? prog->substrs->data[other_ix].utf8_substr
-                    : prog->substrs->data[other_ix].substr)
+    if (prog->substrs->data[other_ix].utf8_substr
+        || prog->substrs->data[other_ix].substr)
      {
         /* Take into account the "other" substring. */
          char *last, *last1;
@@ -1194,6 +1198,11 @@ Perl_re_intuit_start(pTHX_
  
        do_other_substr:
          other = &prog->substrs->data[other_ix];
+        if (!utf8_target && !other->substr) {
+            if (!to_byte_substr(prog)) {
+                NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
+            }
+        }
  
          /* if "other" is anchored:
           * we've previously found a floating substr starting at check_at.
@@ -1477,10 +1486,10 @@ Perl_re_intuit_start(pTHX_
          const U8* const str = (U8*)STRING(progi->regstclass);
  
          /* XXX this value could be pre-computed */
-        const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
+        const SSize_t cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
                     ?  (reginfo->is_utf8_pat
-                        ? utf8_distance(str + STR_LEN(progi->regstclass), str)
-                        : STR_LEN(progi->regstclass))
+                        ? (SSize_t)utf8_distance(str + STR_LEN(progi->regstclass), str)
+                        : (SSize_t)STR_LEN(progi->regstclass))
                     : 1);
         char * endpos;
          char *s;
@@ -1730,7 +1739,7 @@ STMT_START {
          } else {                                                                    \
              uvc = _toFOLD_utf8_flags( (const U8*) uc, uc_end, foldbuf, &foldlen,    \
                                                                              flags); \
-            len = UTF8SKIP(uc);                                                     \
+            len = UTF8_SAFE_SKIP(uc, uc_end);                                       \
              skiplen = UVCHR_SKIP( uvc );                                            \
              foldlen -= skiplen;                                                     \
              uscan = foldbuf + skiplen;                                              \
@@ -1760,7 +1769,7 @@ STMT_START {
      case trie_utf8l:                                                                \
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
          if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) {                             \
-            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc));          \
+            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end);                     \
          }                                                                           \
          /* FALLTHROUGH */                                                           \
      case trie_utf8:                                                                 \
@@ -1788,56 +1797,86 @@ STMT_START {
      dump_exec_pos(li,s,(reginfo->strend),(reginfo->strbeg), \
                  startpos, doutf8, depth)
  
-#define REXEC_FBC_SCAN(UTF8, CODE)                          \
+#define REXEC_FBC_UTF8_SCAN(CODE)                           \
      STMT_START {                                            \
          while (s < strend) {                                \
              CODE                                            \
-            s += ((UTF8) ? UTF8SKIP(s) : 1);                \
+            s += UTF8_SAFE_SKIP(s, reginfo->strend);        \
          }                                                   \
      } STMT_END
  
-#define REXEC_FBC_CLASS_SCAN(UTF8, COND)                    \
+#define REXEC_FBC_NON_UTF8_SCAN(CODE)                       \
      STMT_START {                                            \
          while (s < strend) {                                \
-            REXEC_FBC_CLASS_SCAN_GUTS(UTF8, COND)           \
+            CODE                                            \
+            s++;                                            \
+        }                                                   \
+    } STMT_END
+
+#define REXEC_FBC_UTF8_CLASS_SCAN(COND)                     \
+    STMT_START {                                            \
+        while (s < strend) {                                \
+            REXEC_FBC_UTF8_CLASS_SCAN_GUTS(COND)            \
+        }                                                   \
+    } STMT_END
+
+#define REXEC_FBC_NON_UTF8_CLASS_SCAN(COND)                 \
+    STMT_START {                                            \
+        while (s < strend) {                                \
+            REXEC_FBC_NON_UTF8_CLASS_SCAN_GUTS(COND)        \
          }                                                   \
      } STMT_END
  
-#define REXEC_FBC_CLASS_SCAN_GUTS(UTF8, COND)                  \
+#define REXEC_FBC_UTF8_CLASS_SCAN_GUTS(COND)                   \
      if (COND) {                                                \
          FBC_CHECK_AND_TRY                                      \
-        s += ((UTF8) ? UTF8SKIP(s) : 1);                       \
+        s += UTF8_SAFE_SKIP(s, reginfo->strend);               \
          previous_occurrence_end = s;                           \
      }                                                          \
      else {                                                     \
-        s += ((UTF8) ? UTF8SKIP(s) : 1);                       \
+        s += UTF8SKIP(s);                                      \
      }
  
-#define REXEC_FBC_CSCAN(CONDUTF8,COND)                         \
-    if (utf8_target) {                                         \
-       REXEC_FBC_CLASS_SCAN(1, CONDUTF8);                     \
+#define REXEC_FBC_NON_UTF8_CLASS_SCAN_GUTS(COND)               \
+    if (COND) {                                                \
+        FBC_CHECK_AND_TRY                                      \
+        s++;                                                   \
+        previous_occurrence_end = s;                           \
      }                                                          \
      else {                                                     \
-       REXEC_FBC_CLASS_SCAN(0, COND);                         \
+        s++;                                                   \
      }
  
  /* We keep track of where the next character should start after an occurrence
   * of the one we're looking for.  Knowing that, we can see right away if the
   * next occurrence is adjacent to the previous.  When 'doevery' is FALSE, we
   * don't accept the 2nd and succeeding adjacent occurrences */
-#define FBC_CHECK_AND_TRY                                      \
-        if (   (   doevery                                     \
-                || s != previous_occurrence_end)               \
-            && (reginfo->intuit || regtry(reginfo, &s)))       \
-        {                                                      \
-            goto got_it;                                       \
+#define FBC_CHECK_AND_TRY                                           \
+        if (   (   doevery                                          \
+                || s != previous_occurrence_end)                    \
+            && (   reginfo->intuit                                  \
+                || (s <= reginfo->strend && regtry(reginfo, &s))))  \
+        {                                                           \
+            goto got_it;                                            \
          }
  
  
-/* This differs from the above macros in that it calls a function which returns
- * the next occurrence of the thing being looked for in 's'; and 'strend' if
- * there is no such occurrence. */
-#define REXEC_FBC_FIND_NEXT_SCAN(UTF8, f)                   \
+/* These differ from the above macros in that they call a function which
+ * returns the next occurrence of the thing being looked for in 's'; and
+ * 'strend' if there is no such occurrence. */
+#define REXEC_FBC_UTF8_FIND_NEXT_SCAN(f)                    \
+    while (s < strend) {                                    \
+        s = (f);                                            \
+        if (s >= strend) {                                  \
+            break;                                          \
+        }                                                   \
+                                                            \
+        FBC_CHECK_AND_TRY                                   \
+        s += UTF8SKIP(s);                                   \
+        previous_occurrence_end = s;                        \
+    }
+
+#define REXEC_FBC_NON_UTF8_FIND_NEXT_SCAN(f)                \
      while (s < strend) {                                    \
          s = (f);                                            \
          if (s >= strend) {                                  \
@@ -1845,20 +1884,42 @@ STMT_START {
          }                                                   \
                                                              \
          FBC_CHECK_AND_TRY                                   \
-        s += (UTF8) ? UTF8SKIP(s) : 1;                      \
+        s++;                                                \
          previous_occurrence_end = s;                        \
      }
  
-/* The three macros below are slightly different versions of the same logic.
+/* This differs from the above macros in that it is passed a single byte that
+ * is known to begin the next occurrence of the thing being looked for in 's'.
+ * It does a memchr to find the next occurrence of 'byte', before trying 'COND'
+ * at that position. */
+#define REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(byte, COND)      \
+    while (s < strend) {                                    \
+        s = (char *) memchr(s, byte, strend -s);            \
+        if (s == NULL) {                                    \
+            s = (char *) strend;                            \
+            break;                                          \
+        }                                                   \
+                                                            \
+        if (COND) {                                         \
+            FBC_CHECK_AND_TRY                               \
+            s += UTF8_SAFE_SKIP(s, reginfo->strend);        \
+            previous_occurrence_end = s;                    \
+        }                                                   \
+        else {                                              \
+            s += UTF8SKIP(s);                               \
+        }                                                   \
+    }
+
+/* The four macros below are slightly different versions of the same logic.
   *
   * The first is for /a and /aa when the target string is UTF-8.  This can only
- * match ascii, but it must advance based on UTF-8.   The other two handle the
- * non-UTF-8 and the more generic UTF-8 cases.   In all three, we are looking
- * for the boundary (or non-boundary) between a word and non-word character.
- * The utf8 and non-utf8 cases have the same logic, but the details must be
- * different.  Find the "wordness" of the character just prior to this one, and
- * compare it with the wordness of this one.  If they differ, we have a
- * boundary.  At the beginning of the string, pretend that the previous
+ * match ascii, but it must advance based on UTF-8.   The other three handle
+ * the non-UTF-8 and the more generic UTF-8 cases.   In all four, we are
+ * looking for the boundary (or non-boundary) between a word and non-word
+ * character.  The utf8 and non-utf8 cases have the same logic, but the details
+ * must be different.  Find the "wordness" of the character just prior to this
+ * one, and compare it with the wordness of this one.  If they differ, we have
+ * a boundary.  At the beginning of the string, pretend that the previous
   * character was a new-line.
   *
   * All these macros uncleanly have side-effects with each other and outside
@@ -1876,8 +1937,8 @@ STMT_START {
   * see if this tentative match actually works, and if so, to quit the loop
   * here.  And vice-versa if we are looking for a non-boundary.
   *
- * 'tmp' below in the next three macros in the REXEC_FBC_SCAN and
- * REXEC_FBC_SCAN loops is a loop invariant, a bool giving the return of
+ * 'tmp' below in the next four macros in the REXEC_FBC_UTF8_SCAN and
+ * REXEC_FBC_UTF8_SCAN loops is a loop invariant, a bool giving the return of
   * TEST_NON_UTF8(s-1).  To see this, note that that's what it is defined to be
   * at entry to the loop, and to get to the IF_FAIL branch, tmp must equal
   * TEST_NON_UTF8(s), and in the opposite branch, IF_SUCCESS, tmp is that
@@ -1888,7 +1949,7 @@ STMT_START {
  #define FBC_UTF8_A(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL)                         \
      tmp = (s != reginfo->strbeg) ? UCHARAT(s - 1) : '\n';                      \
      tmp = TEST_NON_UTF8(tmp);                                                  \
-    REXEC_FBC_SCAN(1,  /* 1=>is-utf8; advances s while s < strend */           \
+    REXEC_FBC_UTF8_SCAN( /* advances s while s < strend */                     \
          if (tmp == ! TEST_NON_UTF8((U8) *s)) {                                 \
              tmp = !tmp;                                                        \
              IF_SUCCESS; /* Is a boundary if values for s-1 and s differ */     \
@@ -1900,7 +1961,8 @@ STMT_START {
  
  /* Like FBC_UTF8_A, but TEST_UV is a macro which takes a UV as its input, and
   * TEST_UTF8 is a macro that for the same input code points returns identically
- * to TEST_UV, but takes a pointer to a UTF-8 encoded string instead */
+ * to TEST_UV, but takes a pointer to a UTF-8 encoded string instead (and an
+ * end pointer as well) */
  #define FBC_UTF8(TEST_UV, TEST_UTF8, IF_SUCCESS, IF_FAIL)                      \
      if (s == reginfo->strbeg) {                                                \
          tmp = '\n';                                                            \
@@ -1911,7 +1973,7 @@ STMT_START {
                                                         0, UTF8_ALLOW_DEFAULT); \
      }                                                                          \
      tmp = TEST_UV(tmp);                                                        \
-    REXEC_FBC_SCAN(1,  /* 1=>is-utf8; advances s while s < strend */           \
+    REXEC_FBC_UTF8_SCAN(/* advances s while s < strend */                      \
          if (tmp == ! (TEST_UTF8((U8 *) s, (U8 *) reginfo->strend))) {          \
              tmp = !tmp;                                                        \
              IF_SUCCESS;                                                        \
@@ -1921,32 +1983,14 @@ STMT_START {
          }                                                                      \
      );
  
-/* Like the above two macros.  UTF8_CODE is the complete code for handling
- * UTF-8.  Common to the BOUND and NBOUND cases, set-up by the FBC_BOUND, etc
- * macros below */
-#define FBC_BOUND_COMMON(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL)        \
-    if (utf8_target) {                                                         \
-        UTF8_CODE                                                              \
-    }                                                                          \
-    else {  /* Not utf8 */                                                     \
-       tmp = (s != reginfo->strbeg) ? UCHARAT(s - 1) : '\n';                  \
-       tmp = TEST_NON_UTF8(tmp);                                              \
-       REXEC_FBC_SCAN(0, /* 0=>not-utf8; advances s while s < strend */       \
-           if (tmp == ! TEST_NON_UTF8((U8) *s)) {                             \
-               IF_SUCCESS;                                                    \
-               tmp = !tmp;                                                    \
-           }                                                                  \
-           else {                                                             \
-               IF_FAIL;                                                       \
-           }                                                                  \
-       );                                                                     \
-    }                                                                          \
+/* Like the above two macros, for a UTF-8 target string.  UTF8_CODE is the
+ * complete code for handling UTF-8.  Common to the BOUND and NBOUND cases,
+ * set-up by the FBC_BOUND, etc macros below */
+#define FBC_BOUND_COMMON_UTF8(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL)   \
+    UTF8_CODE;                                                                 \
      /* Here, things have been set up by the previous code so that tmp is the   \
-     * return of TEST_NON_UTF(s-1) or TEST_UTF8(s-1) (depending on the         \
-     * utf8ness of the target).  We also have to check if this matches against \
-     * the EOS, which we treat as a \n (which is the same value in both UTF-8  \
-     * or non-UTF8, so can use the non-utf8 test condition even for a UTF-8    \
-     * string */                                                               \
+     * return of TEST_NON_UTF8(s-1).  We also have to check if this matches    \
+     * against the EOS, which we treat as a \n */                              \
      if (tmp == ! TEST_NON_UTF8('\n')) {                                        \
          IF_SUCCESS;                                                            \
      }                                                                          \
@@ -1954,10 +1998,36 @@ STMT_START {
          IF_FAIL;                                                               \
      }
  
+/* Same as the macro above, but the target isn't UTF-8 */
+#define FBC_BOUND_COMMON_NON_UTF8(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL)       \
+    tmp = (s != reginfo->strbeg) ? UCHARAT(s - 1) : '\n';                   \
+    tmp = TEST_NON_UTF8(tmp);                                               \
+    REXEC_FBC_NON_UTF8_SCAN(/* advances s while s < strend */               \
+        if (tmp == ! TEST_NON_UTF8(UCHARAT(s))) {                           \
+            IF_SUCCESS;                                                     \
+            tmp = !tmp;                                                     \
+        }                                                                   \
+        else {                                                              \
+            IF_FAIL;                                                        \
+        }                                                                   \
+    );                                                                      \
+    /* Here, things have been set up by the previous code so that tmp is    \
+     * the return of TEST_NON_UTF8(s-1).   We also have to check if this    \
+     * matches against the EOS, which we treat as a \n */                   \
+    if (tmp == ! TEST_NON_UTF8('\n')) {                                     \
+        IF_SUCCESS;                                                         \
+    }                                                                       \
+    else {                                                                  \
+        IF_FAIL;                                                            \
+    }
+
  /* This is the macro to use when we want to see if something that looks like it
- * could match, actually does, and if so exits the loop */
-#define REXEC_FBC_TRYIT                            \
-    if ((reginfo->intuit || regtry(reginfo, &s)))  \
+ * could match, actually does, and if so exits the loop.  It needs to be used
+ * only for bounds checking macros, as it allows for matching beyond the end of
+ * string (which should be zero length without having to look at the string
+ * contents) */
+#define REXEC_FBC_TRYIT                                                     \
+    if (reginfo->intuit || (s <= reginfo->strend && regtry(reginfo, &s)))   \
          goto got_it
  
  /* The only difference between the BOUND and NBOUND cases is that
@@ -1968,26 +2038,39 @@ STMT_START {
   * The TEST_FOO parameters are for operating on different forms of input, but
   * all should be ones that return identically for the same underlying code
   * points */
-#define FBC_BOUND(TEST_NON_UTF8, TEST_UV, TEST_UTF8)                           \
-    FBC_BOUND_COMMON(                                                          \
-          FBC_UTF8(TEST_UV, TEST_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER),          \
+
+#define FBC_BOUND_UTF8(TEST_NON_UTF8, TEST_UV, TEST_UTF8)                   \
+    FBC_BOUND_COMMON_UTF8(                                                  \
+          FBC_UTF8(TEST_UV, TEST_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER),       \
            TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
  
-#define FBC_BOUND_A(TEST_NON_UTF8)                                             \
-    FBC_BOUND_COMMON(                                                          \
-            FBC_UTF8_A(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER),           \
-            TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
+#define FBC_BOUND_NON_UTF8(TEST_NON_UTF8)                                   \
+    FBC_BOUND_COMMON_NON_UTF8(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
+
+#define FBC_BOUND_A_UTF8(TEST_NON_UTF8)                                     \
+    FBC_BOUND_COMMON_UTF8(                                                  \
+                    FBC_UTF8_A(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER),\
+                    TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
+
+#define FBC_BOUND_A_NON_UTF8(TEST_NON_UTF8)                                 \
+    FBC_BOUND_COMMON_NON_UTF8(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
+
+#define FBC_NBOUND_UTF8(TEST_NON_UTF8, TEST_UV, TEST_UTF8)                  \
+    FBC_BOUND_COMMON_UTF8(                                                  \
+              FBC_UTF8(TEST_UV, TEST_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT),   \
+              TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
  
-#define FBC_NBOUND(TEST_NON_UTF8, TEST_UV, TEST_UTF8)                          \
-    FBC_BOUND_COMMON(                                                          \
-          FBC_UTF8(TEST_UV, TEST_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT),          \
-          TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
+#define FBC_NBOUND_NON_UTF8(TEST_NON_UTF8)                                  \
+    FBC_BOUND_COMMON_NON_UTF8(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
  
-#define FBC_NBOUND_A(TEST_NON_UTF8)                                            \
-    FBC_BOUND_COMMON(                                                          \
-            FBC_UTF8_A(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT),           \
+#define FBC_NBOUND_A_UTF8(TEST_NON_UTF8)                                    \
+    FBC_BOUND_COMMON_UTF8(                                                  \
+            FBC_UTF8_A(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT),        \
              TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
  
+#define FBC_NBOUND_A_NON_UTF8(TEST_NON_UTF8)                                \
+    FBC_BOUND_COMMON_NON_UTF8(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
+
  #ifdef DEBUGGING
  static IV
  S_get_break_val_cp_checked(SV* const invlist, const UV cp_in) {
@@ -2075,7 +2158,6 @@ STATIC char *
  S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, 
      const char *strend, regmatch_info *reginfo)
  {
-    dVAR;
  
      /* TRUE if x+ need not match at just the 1st pos of run of x's */
      const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
@@ -2110,72 +2192,174 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
  
      PERL_ARGS_ASSERT_FIND_BYCLASS;
  
-    /* We know what class it must start with. */
-    switch (OP(c)) {
-    case ANYOFPOSIXL:
-    case ANYOFL:
+    /* We know what class it must start with. The case statements below have
+     * encoded the OP, and the UTF8ness of the target ('t8' for is UTF-8; 'tb'
+     * for it isn't; 'b' stands for byte), and the UTF8ness of the pattern
+     * ('p8' and 'pb'. */
+    switch (with_tp_UTF8ness(OP(c), utf8_target, is_utf8_pat)) {
+
+      case ANYOFPOSIXL_t8_pb:
+      case ANYOFPOSIXL_t8_p8:
+      case ANYOFL_t8_pb:
+      case ANYOFL_t8_p8:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+        CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(c);
  
-        if (ANYOFL_UTF8_LOCALE_REQD(FLAGS(c)) && ! IN_UTF8_CTYPE_LOCALE) {
-            Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required);
-        }
+        /* FALLTHROUGH */
+
+      case ANYOFD_t8_pb:
+      case ANYOFD_t8_p8:
+      case ANYOF_t8_pb:
+      case ANYOF_t8_p8:
+        REXEC_FBC_UTF8_CLASS_SCAN(
+                reginclass(prog, c, (U8*)s, (U8*) strend, 1 /* is utf8 */));
+        break;
+
+      case ANYOFPOSIXL_tb_pb:
+      case ANYOFPOSIXL_tb_p8:
+      case ANYOFL_tb_pb:
+      case ANYOFL_tb_p8:
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+        CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(c);
  
          /* FALLTHROUGH */
-    case ANYOFD:
-    case ANYOF:
-        if (utf8_target) {
-            REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */
-                      reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
-        }
-        else if (ANYOF_FLAGS(c) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+
+      case ANYOFD_tb_pb:
+      case ANYOFD_tb_p8:
+      case ANYOF_tb_pb:
+      case ANYOF_tb_p8:
+        if (ANYOF_FLAGS(c) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
              /* We know that s is in the bitmap range since the target isn't
               * UTF-8, so what happens for out-of-range values is not relevant,
               * so exclude that from the flags */
-            REXEC_FBC_CLASS_SCAN(0, reginclass(prog,c, (U8*)s, (U8*)s+1, 0));
+            REXEC_FBC_NON_UTF8_CLASS_SCAN(reginclass(prog,c, (U8*)s, (U8*)s+1,
+                                                     0));
          }
          else {
-            REXEC_FBC_CLASS_SCAN(0, ANYOF_BITMAP_TEST(c, *((U8*)s)));
+            REXEC_FBC_NON_UTF8_CLASS_SCAN(ANYOF_BITMAP_TEST(c, *((U8*)s)));
          }
          break;
  
-    case ANYOFM:    /* ARG() is the base byte; FLAGS() the mask byte */
-        /* UTF-8ness doesn't matter, so use 0 */
-        REXEC_FBC_FIND_NEXT_SCAN(0,
-         (char *) find_next_masked((U8 *) s, (U8 *) strend,
-                                   (U8) ARG(c), FLAGS(c)));
+      case ANYOFM_tb_pb: /* ARG() is the base byte; FLAGS() the mask byte */
+      case ANYOFM_tb_p8:
+        REXEC_FBC_NON_UTF8_FIND_NEXT_SCAN(
+                            (char *) find_next_masked((U8 *) s, (U8 *) strend,
+                                                    (U8) ARG(c), FLAGS(c)));
          break;
  
-    case NANYOFM:
-        REXEC_FBC_FIND_NEXT_SCAN(0,
-         (char *) find_span_end_mask((U8 *) s, (U8 *) strend,
-                                   (U8) ARG(c), FLAGS(c)));
+      case ANYOFM_t8_pb:
+      case ANYOFM_t8_p8:
+        /* UTF-8ness doesn't matter because only matches UTF-8 invariants.  But
+         * we do anyway for performance reasons, as otherwise we would have to
+         * examine all the continuation characters */
+        REXEC_FBC_UTF8_FIND_NEXT_SCAN(
+                            (char *) find_next_masked((U8 *) s, (U8 *) strend,
+                                                    (U8) ARG(c), FLAGS(c)));
          break;
  
-    case ANYOFH:
-        if (utf8_target) REXEC_FBC_CLASS_SCAN(TRUE,
-                      reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
+      case NANYOFM_tb_pb:
+      case NANYOFM_tb_p8:
+        REXEC_FBC_NON_UTF8_FIND_NEXT_SCAN(
+                        (char *) find_span_end_mask((U8 *) s, (U8 *) strend,
+                                                (U8) ARG(c), FLAGS(c)));
          break;
  
-    case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
-        assert(! is_utf8_pat);
-       /* FALLTHROUGH */
-    case EXACTFAA:
-        if (is_utf8_pat) {
-            utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII
-                             |FOLDEQ_S2_ALREADY_FOLDED|FOLDEQ_S2_FOLDS_SANE;
-            goto do_exactf_utf8;
+      case NANYOFM_t8_pb:
+      case NANYOFM_t8_p8: /* UTF-8ness does matter because can match UTF-8
+                                  variants. */
+        REXEC_FBC_UTF8_FIND_NEXT_SCAN(
+                        (char *) find_span_end_mask((U8 *) s, (U8 *) strend,
+                                                    (U8) ARG(c), FLAGS(c)));
+        break;
+
+      /* These nodes all require at least one code point to be in UTF-8 to
+       * match */
+      case ANYOFH_tb_pb:
+      case ANYOFH_tb_p8:
+      case ANYOFHb_tb_pb:
+      case ANYOFHb_tb_p8:
+      case ANYOFHr_tb_pb:
+      case ANYOFHr_tb_p8:
+      case ANYOFHs_tb_pb:
+      case ANYOFHs_tb_p8:
+      case EXACTFLU8_tb_pb:
+      case EXACTFLU8_tb_p8:
+      case EXACTFU_REQ8_tb_pb:
+      case EXACTFU_REQ8_tb_p8:
+        break;
+
+      case ANYOFH_t8_pb:
+      case ANYOFH_t8_p8:
+        REXEC_FBC_UTF8_CLASS_SCAN(
+              (   (U8) NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
+               && reginclass(prog, c, (U8*)s, (U8*) strend, 1 /* is utf8 */)));
+        break;
+
+      case ANYOFHb_t8_pb:
+      case ANYOFHb_t8_p8:
+        {
+            /* We know what the first byte of any matched string should be. */
+            U8 first_byte = FLAGS(c);
+
+            REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
+                    reginclass(prog, c, (U8*)s, (U8*) strend, 1 /* is utf8 */));
          }
-        else if (utf8_target) {
+        break;
  
-            /* Here, and elsewhere in this file, the reason we can't consider a
-             * non-UTF-8 pattern already folded in the presence of a UTF-8
-             * target is because any MICRO SIGN in the pattern won't be folded.
-             * Since the fold of the MICRO SIGN requires UTF-8 to represent, we
-             * can consider a non-UTF-8 pattern folded when matching a
-             * non-UTF-8 target */
-            utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
-            goto do_exactf_utf8;
+      case ANYOFHr_t8_pb:
+      case ANYOFHr_t8_p8:
+        REXEC_FBC_UTF8_CLASS_SCAN(
+                    (   inRANGE(NATIVE_UTF8_TO_I8(*s),
+                                LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)),
+                                HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)))
+                    && reginclass(prog, c, (U8*)s, (U8*) strend,
+                                                           1 /* is utf8 */)));
+        break;
+
+      case ANYOFHs_t8_pb:
+      case ANYOFHs_t8_p8:
+        REXEC_FBC_UTF8_CLASS_SCAN(
+                (   strend -s >= FLAGS(c)
+                && memEQ(s, ((struct regnode_anyofhs *) c)->string, FLAGS(c))
+                && reginclass(prog, c, (U8*)s, (U8*) strend, 1 /* is utf8 */)));
+        break;
+
+      case ANYOFR_tb_pb:
+      case ANYOFR_tb_p8:
+        REXEC_FBC_NON_UTF8_CLASS_SCAN(withinCOUNT((U8) *s,
+                                            ANYOFRbase(c), ANYOFRdelta(c)));
+        break;
+
+      case ANYOFR_t8_pb:
+      case ANYOFR_t8_p8:
+        REXEC_FBC_UTF8_CLASS_SCAN(
+                            (   NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
+                             && withinCOUNT(utf8_to_uvchr_buf((U8 *) s,
+                                                              (U8 *) strend,
+                                                              NULL),
+                                            ANYOFRbase(c), ANYOFRdelta(c))));
+        break;
+
+      case ANYOFRb_tb_pb:
+      case ANYOFRb_tb_p8:
+        REXEC_FBC_NON_UTF8_CLASS_SCAN(withinCOUNT((U8) *s,
+                                            ANYOFRbase(c), ANYOFRdelta(c)));
+        break;
+
+      case ANYOFRb_t8_pb:
+      case ANYOFRb_t8_p8:
+        {   /* We know what the first byte of any matched string should be */
+            U8 first_byte = FLAGS(c);
+
+            REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
+                                withinCOUNT(utf8_to_uvchr_buf((U8 *) s,
+                                                              (U8 *) strend,
+                                                              NULL),
+                                            ANYOFRbase(c), ANYOFRdelta(c)));
          }
+        break;
+
+      case EXACTFAA_tb_pb:
  
          /* Latin1 folds are not affected by /a, except it excludes the sharp s,
           * which these functions don't handle anyway */
@@ -2183,80 +2367,50 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          folder = foldEQ_latin1_s2_folded;
          goto do_exactf_non_utf8;
  
-    case EXACTF:   /* This node only generated for non-utf8 patterns */
-        assert(! is_utf8_pat);
-        if (utf8_target) {
-            goto do_exactf_utf8;
-        }
+      case EXACTF_tb_pb:
          fold_array = PL_fold;
          folder = foldEQ;
          goto do_exactf_non_utf8;
  
-    case EXACTFL:
+      case EXACTFL_tb_pb:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-        if (is_utf8_pat || utf8_target || IN_UTF8_CTYPE_LOCALE) {
+
+        if (IN_UTF8_CTYPE_LOCALE) {
              utf8_fold_flags = FOLDEQ_LOCALE;
              goto do_exactf_utf8;
          }
+
          fold_array = PL_fold_locale;
          folder = foldEQ_locale;
          goto do_exactf_non_utf8;
  
-    case EXACTFUP:      /* Problematic even though pattern isn't UTF-8.  Use
-                           full functionality normally not done except for
-                           UTF-8 */
-        assert(! is_utf8_pat);
-        goto do_exactf_utf8;
-
-    case EXACTFLU8:
-            if (! utf8_target) {    /* All code points in this node require
-                                       UTF-8 to express.  */
-                break;
-            }
-            utf8_fold_flags =  FOLDEQ_LOCALE | FOLDEQ_S2_ALREADY_FOLDED
-                                             | FOLDEQ_S2_FOLDS_SANE;
-            goto do_exactf_utf8;
-
-    case EXACTFU_ONLY8:
-        if (! utf8_target) {
-            break;
-        }
-        assert(is_utf8_pat);
-        utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
-        goto do_exactf_utf8;
-
-    case EXACTFU:
-        if (is_utf8_pat || utf8_target) {
-            utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
-            goto do_exactf_utf8;
-        }
-
-        /* Any 'ss' in the pattern should have been replaced by regcomp,
-         * so we don't have to worry here about this single special case
-         * in the Latin1 range */
+      case EXACTFU_tb_pb:
+        /* Any 'ss' in the pattern should have been replaced by regcomp, so we
+         * don't have to worry here about this single special case in the
+         * Latin1 range */
          fold_array = PL_fold_latin1;
          folder = foldEQ_latin1_s2_folded;
  
          /* FALLTHROUGH */
  
-      do_exactf_non_utf8: /* Neither pattern nor string are UTF8, and there
-                           are no glitches with fold-length differences
-                           between the target string and pattern */
+       do_exactf_non_utf8: /* Neither pattern nor string are UTF8, and there
+                              are no glitches with fold-length differences
+                              between the target string and pattern */
  
-        /* The idea in the non-utf8 EXACTF* cases is to first find the
-         * first character of the EXACTF* node and then, if necessary,
+        /* The idea in the non-utf8 EXACTF* cases is to first find the first
+         * character of the EXACTF* node and then, if necessary,
           * case-insensitively compare the full text of the node.  c1 is the
           * first character.  c2 is its fold.  This logic will not work for
-         * Unicode semantics and the german sharp ss, which hence should
-         * not be compiled into a node that gets here. */
-        pat_string = STRING(c);
-        ln  = STR_LEN(c);      /* length to match in octets/bytes */
-
-        /* We know that we have to match at least 'ln' bytes (which is the
-         * same as characters, since not utf8).  If we have to match 3
-         * characters, and there are only 2 availabe, we know without
-         * trying that it will fail; so don't start a match past the
-         * required minimum number from the far end */
+         * Unicode semantics and the german sharp ss, which hence should not be
+         * compiled into a node that gets here. */
+        pat_string = STRINGs(c);
+        ln  = STR_LENs(c);     /* length to match in octets/bytes */
+
+        /* We know that we have to match at least 'ln' bytes (which is the same
+         * as characters, since not utf8).  If we have to match 3 characters,
+         * and there are only 2 availabe, we know without trying that it will
+         * fail; so don't start a match past the required minimum number from
+         * the far end */
          e = HOP3c(strend, -((SSize_t)ln), s);
          if (e < s)
              break;
@@ -2318,486 +2472,647 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          }
          break;
  
-      do_exactf_utf8:
-      {
-        unsigned expansion;
-
-        /* If one of the operands is in utf8, we can't use the simpler folding
-         * above, due to the fact that many different characters can have the
-         * same fold, or portion of a fold, or different- length fold */
-        pat_string = STRING(c);
-        ln  = STR_LEN(c);      /* length to match in octets/bytes */
-        pat_end = pat_string + ln;
-        lnc = is_utf8_pat       /* length to match in characters */
-                ? utf8_length((U8 *) pat_string, (U8 *) pat_end)
-                : ln;
-
-        /* We have 'lnc' characters to match in the pattern, but because of
-         * multi-character folding, each character in the target can match
-         * up to 3 characters (Unicode guarantees it will never exceed
-         * this) if it is utf8-encoded; and up to 2 if not (based on the
-         * fact that the Latin 1 folds are already determined, and the
-         * only multi-char fold in that range is the sharp-s folding to
-         * 'ss'.  Thus, a pattern character can match as little as 1/3 of a
-         * string character.  Adjust lnc accordingly, rounding up, so that
-         * if we need to match at least 4+1/3 chars, that really is 5. */
-        expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
-        lnc = (lnc + expansion - 1) / expansion;
-
-        /* As in the non-UTF8 case, if we have to match 3 characters, and
-         * only 2 are left, it's guaranteed to fail, so don't start a
-         * match that would require us to go beyond the end of the string
-         */
-        e = HOP3c(strend, -((SSize_t)lnc), s);
-
-        /* XXX Note that we could recalculate e to stop the loop earlier,
-         * as the worst case expansion above will rarely be met, and as we
-         * go along we would usually find that e moves further to the left.
-         * This would happen only after we reached the point in the loop
-         * where if there were no expansion we should fail.  Unclear if
-         * worth the expense */
-
-        while (s <= e) {
-            char *my_strend= (char *)strend;
-            if (foldEQ_utf8_flags(s, &my_strend, 0,  utf8_target,
-                  pat_string, NULL, ln, is_utf8_pat, utf8_fold_flags)
-                && (reginfo->intuit || regtry(reginfo, &s)) )
-            {
-                goto got_it;
-            }
-            s += (utf8_target) ? UTF8SKIP(s) : 1;
-        }
-        break;
-    }
+      case EXACTFAA_tb_p8:
+      case EXACTFAA_t8_p8:
+        utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII
+                         |FOLDEQ_S2_ALREADY_FOLDED
+                         |FOLDEQ_S2_FOLDS_SANE;
+        goto do_exactf_utf8;
  
-    case BOUNDL:
-        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-        if (FLAGS(c) != TRADITIONAL_BOUND) {
-            if (! IN_UTF8_CTYPE_LOCALE) {
-                Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
-                                                B_ON_NON_UTF8_LOCALE_IS_WRONG);
-            }
-            goto do_boundu;
-        }
+      case EXACTFAA_NO_TRIE_tb_pb:
+      case EXACTFAA_NO_TRIE_t8_pb:
+      case EXACTFAA_t8_pb:
  
-        FBC_BOUND(isWORDCHAR_LC, isWORDCHAR_LC_uvchr, isWORDCHAR_LC_utf8_safe);
-        break;
+        /* Here, and elsewhere in this file, the reason we can't consider a
+         * non-UTF-8 pattern already folded in the presence of a UTF-8 target
+         * is because any MICRO SIGN in the pattern won't be folded.  Since the
+         * fold of the MICRO SIGN requires UTF-8 to represent, we can consider
+         * a non-UTF-8 pattern folded when matching a non-UTF-8 target */
+        utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
+        goto do_exactf_utf8;
  
-    case NBOUNDL:
+      case EXACTFL_tb_p8:
+      case EXACTFL_t8_pb:
+      case EXACTFL_t8_p8:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-        if (FLAGS(c) != TRADITIONAL_BOUND) {
-            if (! IN_UTF8_CTYPE_LOCALE) {
-                Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
-                                                B_ON_NON_UTF8_LOCALE_IS_WRONG);
+        utf8_fold_flags = FOLDEQ_LOCALE;
+        goto do_exactf_utf8;
+
+      case EXACTFLU8_t8_pb:
+      case EXACTFLU8_t8_p8:
+        utf8_fold_flags =  FOLDEQ_LOCALE | FOLDEQ_S2_ALREADY_FOLDED
+                                         | FOLDEQ_S2_FOLDS_SANE;
+        goto do_exactf_utf8;
+
+      case EXACTFU_REQ8_t8_p8:
+        utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
+        goto do_exactf_utf8;
+
+      case EXACTFU_tb_p8:
+      case EXACTFU_t8_pb:
+      case EXACTFU_t8_p8:
+        utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
+        goto do_exactf_utf8;
+
+      /* The following are problematic even though pattern isn't UTF-8.  Use
+       * full functionality normally not done except for UTF-8. */
+      case EXACTF_t8_pb:
+      case EXACTFUP_tb_pb:
+      case EXACTFUP_t8_pb:
+
+       do_exactf_utf8:
+        {
+            unsigned expansion;
+
+            /* If one of the operands is in utf8, we can't use the simpler
+             * folding above, due to the fact that many different characters
+             * can have the same fold, or portion of a fold, or different-
+             * length fold */
+            pat_string = STRINGs(c);
+            ln  = STR_LENs(c); /* length to match in octets/bytes */
+            pat_end = pat_string + ln;
+            lnc = is_utf8_pat       /* length to match in characters */
+                  ? utf8_length((U8 *) pat_string, (U8 *) pat_end)
+                  : ln;
+
+            /* We have 'lnc' characters to match in the pattern, but because of
+             * multi-character folding, each character in the target can match
+             * up to 3 characters (Unicode guarantees it will never exceed
+             * this) if it is utf8-encoded; and up to 2 if not (based on the
+             * fact that the Latin 1 folds are already determined, and the only
+             * multi-char fold in that range is the sharp-s folding to 'ss'.
+             * Thus, a pattern character can match as little as 1/3 of a string
+             * character.  Adjust lnc accordingly, rounding up, so that if we
+             * need to match at least 4+1/3 chars, that really is 5. */
+            expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
+            lnc = (lnc + expansion - 1) / expansion;
+
+            /* As in the non-UTF8 case, if we have to match 3 characters, and
+             * only 2 are left, it's guaranteed to fail, so don't start a match
+             * that would require us to go beyond the end of the string */
+            e = HOP3c(strend, -((SSize_t)lnc), s);
+
+            /* XXX Note that we could recalculate e to stop the loop earlier,
+             * as the worst case expansion above will rarely be met, and as we
+             * go along we would usually find that e moves further to the left.
+             * This would happen only after we reached the point in the loop
+             * where if there were no expansion we should fail.  Unclear if
+             * worth the expense */
+
+            while (s <= e) {
+                char *my_strend= (char *)strend;
+                if (   foldEQ_utf8_flags(s, &my_strend, 0,  utf8_target,
+                                         pat_string, NULL, ln, is_utf8_pat,
+                                         utf8_fold_flags)
+                    && (reginfo->intuit || regtry(reginfo, &s)) )
+                {
+                    goto got_it;
+                }
+                s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
              }
-            goto do_nboundu;
          }
-
-        FBC_NBOUND(isWORDCHAR_LC, isWORDCHAR_LC_uvchr, isWORDCHAR_LC_utf8_safe);
          break;
  
-    case BOUND: /* regcomp.c makes sure that this only has the traditional \b
-                   meaning */
+      case BOUNDA_tb_pb:
+      case BOUNDA_tb_p8:
+      case BOUND_tb_pb:  /* /d without utf8 target is /a */
+      case BOUND_tb_p8:
+        /* regcomp.c makes sure that these only have the traditional \b
+         * meaning. */
          assert(FLAGS(c) == TRADITIONAL_BOUND);
  
-        FBC_BOUND(isWORDCHAR, isWORDCHAR_uni, isWORDCHAR_utf8_safe);
+        FBC_BOUND_A_NON_UTF8(isWORDCHAR_A);
          break;
  
-    case BOUNDA: /* regcomp.c makes sure that this only has the traditional \b
-                   meaning */
+      case BOUNDA_t8_pb: /* What /a matches is same under UTF-8 */
+      case BOUNDA_t8_p8:
+        /* regcomp.c makes sure that these only have the traditional \b
+         * meaning. */
          assert(FLAGS(c) == TRADITIONAL_BOUND);
  
-        FBC_BOUND_A(isWORDCHAR_A);
+        FBC_BOUND_A_UTF8(isWORDCHAR_A);
          break;
  
-    case NBOUND: /* regcomp.c makes sure that this only has the traditional \b
-                   meaning */
+      case NBOUNDA_tb_pb:
+      case NBOUNDA_tb_p8:
+      case NBOUND_tb_pb: /* /d without utf8 target is /a */
+      case NBOUND_tb_p8:
+        /* regcomp.c makes sure that these only have the traditional \b
+         * meaning. */
          assert(FLAGS(c) == TRADITIONAL_BOUND);
  
-        FBC_NBOUND(isWORDCHAR, isWORDCHAR_uni, isWORDCHAR_utf8_safe);
+        FBC_NBOUND_A_NON_UTF8(isWORDCHAR_A);
          break;
  
-    case NBOUNDA: /* regcomp.c makes sure that this only has the traditional \b
-                   meaning */
+      case NBOUNDA_t8_pb: /* What /a matches is same under UTF-8 */
+      case NBOUNDA_t8_p8:
+        /* regcomp.c makes sure that these only have the traditional \b
+         * meaning. */
          assert(FLAGS(c) == TRADITIONAL_BOUND);
  
-        FBC_NBOUND_A(isWORDCHAR_A);
+        FBC_NBOUND_A_UTF8(isWORDCHAR_A);
          break;
  
-    case NBOUNDU:
+      case NBOUNDU_tb_pb:
+      case NBOUNDU_tb_p8:
          if ((bound_type) FLAGS(c) == TRADITIONAL_BOUND) {
-            FBC_NBOUND(isWORDCHAR_L1, isWORDCHAR_uni, isWORDCHAR_utf8_safe);
+            FBC_NBOUND_NON_UTF8(isWORDCHAR_L1);
+            break;
+        }
+
+        to_complement = 1;
+        goto do_boundu_non_utf8;
+
+      case NBOUNDL_tb_pb:
+      case NBOUNDL_tb_p8:
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+        if (FLAGS(c) == TRADITIONAL_BOUND) {
+            FBC_NBOUND_NON_UTF8(isWORDCHAR_LC);
              break;
          }
  
-      do_nboundu:
+        CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND;
  
          to_complement = 1;
-        /* FALLTHROUGH */
+        goto do_boundu_non_utf8;
  
-    case BOUNDU:
-      do_boundu:
-        switch((bound_type) FLAGS(c)) {
-            case TRADITIONAL_BOUND:
-                FBC_BOUND(isWORDCHAR_L1, isWORDCHAR_uni, isWORDCHAR_utf8_safe);
-                break;
-            case GCB_BOUND:
-                if (s == reginfo->strbeg) {
-                    if (reginfo->intuit || regtry(reginfo, &s))
-                    {
-                        goto got_it;
-                    }
+      case BOUNDL_tb_pb:
+      case BOUNDL_tb_p8:
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+        if (FLAGS(c) == TRADITIONAL_BOUND) {
+            FBC_BOUND_NON_UTF8(isWORDCHAR_LC);
+            break;
+        }
  
-                    /* Didn't match.  Try at the next position (if there is one) */
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
-                    if (UNLIKELY(s >= reginfo->strend)) {
-                        break;
-                    }
-                }
+        CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND;
  
-                if (utf8_target) {
-                    GCB_enum before = getGCB_VAL_UTF8(
-                                               reghop3((U8*)s, -1,
-                                                       (U8*)(reginfo->strbeg)),
-                                               (U8*) reginfo->strend);
-                    while (s < strend) {
-                        GCB_enum after = getGCB_VAL_UTF8((U8*) s,
-                                                        (U8*) reginfo->strend);
-                        if (   (to_complement ^ isGCB(before,
-                                                      after,
-                                                      (U8*) reginfo->strbeg,
-                                                      (U8*) s,
-                                                      utf8_target))
-                            && (reginfo->intuit || regtry(reginfo, &s)))
-                        {
-                            goto got_it;
-                        }
-                        before = after;
-                        s += UTF8SKIP(s);
-                    }
-                }
-                else {  /* Not utf8.  Everything is a GCB except between CR and
-                           LF */
-                    while (s < strend) {
-                        if ((to_complement ^ (   UCHARAT(s - 1) != '\r'
-                                              || UCHARAT(s) != '\n'))
-                            && (reginfo->intuit || regtry(reginfo, &s)))
-                        {
-                            goto got_it;
-                        }
-                        s++;
-                    }
-                }
+        goto do_boundu_non_utf8;
+
+      case BOUNDU_tb_pb:
+      case BOUNDU_tb_p8:
+        if ((bound_type) FLAGS(c) == TRADITIONAL_BOUND) {
+            FBC_BOUND_NON_UTF8(isWORDCHAR_L1);
+            break;
+        }
+
+      do_boundu_non_utf8:
+        if (s == reginfo->strbeg) {
+            if (reginfo->intuit || regtry(reginfo, &s))
+            {
+                goto got_it;
+            }
+
+            /* Didn't match.  Try at the next position (if there is one) */
+            s++;
+            if (UNLIKELY(s >= reginfo->strend)) {
+                break;
+            }
+        }
  
-                /* And, since this is a bound, it can match after the final
-                 * character in the string */
-                if ((reginfo->intuit || regtry(reginfo, &s))) {
+        switch((bound_type) FLAGS(c)) {
+          case TRADITIONAL_BOUND: /* Should have already been handled */
+            assert(0);
+            break;
+
+          case GCB_BOUND:
+            /* Not utf8.  Everything is a GCB except between CR and LF */
+            while (s < strend) {
+                if ((to_complement ^ (   UCHARAT(s - 1) != '\r'
+                                      || UCHARAT(s) != '\n'))
+                    && (reginfo->intuit || regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
-                break;
+                s++;
+            }
  
-            case LB_BOUND:
-                if (s == reginfo->strbeg) {
-                    if (reginfo->intuit || regtry(reginfo, &s)) {
+            break;
+
+          case LB_BOUND:
+            {
+                LB_enum before = getLB_VAL_CP((U8) *(s -1));
+                while (s < strend) {
+                    LB_enum after = getLB_VAL_CP((U8) *s);
+                    if (to_complement ^ isLB(before,
+                                             after,
+                                             (U8*) reginfo->strbeg,
+                                             (U8*) s,
+                                             (U8*) reginfo->strend,
+                                             0 /* target not utf8 */ )
+                        && (reginfo->intuit || regtry(reginfo, &s)))
+                    {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
-                    if (UNLIKELY(s >= reginfo->strend)) {
-                        break;
-                    }
+                    before = after;
+                    s++;
                  }
+            }
  
-                if (utf8_target) {
-                    LB_enum before = getLB_VAL_UTF8(reghop3((U8*)s,
-                                                               -1,
-                                                               (U8*)(reginfo->strbeg)),
-                                                       (U8*) reginfo->strend);
-                    while (s < strend) {
-                        LB_enum after = getLB_VAL_UTF8((U8*) s, (U8*) reginfo->strend);
-                        if (to_complement ^ isLB(before,
-                                                 after,
-                                                 (U8*) reginfo->strbeg,
-                                                 (U8*) s,
-                                                 (U8*) reginfo->strend,
-                                                 utf8_target)
-                            && (reginfo->intuit || regtry(reginfo, &s)))
-                        {
-                            goto got_it;
-                        }
-                        before = after;
-                        s += UTF8SKIP(s);
-                    }
-                }
-                else {  /* Not utf8. */
-                    LB_enum before = getLB_VAL_CP((U8) *(s -1));
-                    while (s < strend) {
-                        LB_enum after = getLB_VAL_CP((U8) *s);
-                        if (to_complement ^ isLB(before,
-                                                 after,
-                                                 (U8*) reginfo->strbeg,
-                                                 (U8*) s,
-                                                 (U8*) reginfo->strend,
-                                                 utf8_target)
-                            && (reginfo->intuit || regtry(reginfo, &s)))
-                        {
-                            goto got_it;
-                        }
-                        before = after;
-                        s++;
-                    }
-                }
+            break;
  
-                if (reginfo->intuit || regtry(reginfo, &s)) {
-                    goto got_it;
+          case SB_BOUND:
+            {
+                SB_enum before = getSB_VAL_CP((U8) *(s -1));
+                while (s < strend) {
+                    SB_enum after = getSB_VAL_CP((U8) *s);
+                    if ((to_complement ^ isSB(before,
+                                              after,
+                                              (U8*) reginfo->strbeg,
+                                              (U8*) s,
+                                              (U8*) reginfo->strend,
+                                             0 /* target not utf8 */ ))
+                        && (reginfo->intuit || regtry(reginfo, &s)))
+                    {
+                        goto got_it;
+                    }
+                    before = after;
+                    s++;
                  }
+            }
  
-                break;
+            break;
  
-            case SB_BOUND:
-                if (s == reginfo->strbeg) {
-                    if (reginfo->intuit || regtry(reginfo, &s)) {
+          case WB_BOUND:
+            {
+                WB_enum previous = WB_UNKNOWN;
+                WB_enum before = getWB_VAL_CP((U8) *(s -1));
+                while (s < strend) {
+                    WB_enum after = getWB_VAL_CP((U8) *s);
+                    if ((to_complement ^ isWB(previous,
+                                              before,
+                                              after,
+                                              (U8*) reginfo->strbeg,
+                                              (U8*) s,
+                                              (U8*) reginfo->strend,
+                                               0 /* target not utf8 */ ))
+                        && (reginfo->intuit || regtry(reginfo, &s)))
+                    {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
-                    if (UNLIKELY(s >= reginfo->strend)) {
-                        break;
-                    }
+                    previous = before;
+                    before = after;
+                    s++;
                  }
+            }
+        }
  
-                if (utf8_target) {
-                    SB_enum before = getSB_VAL_UTF8(reghop3((U8*)s,
-                                                        -1,
-                                                        (U8*)(reginfo->strbeg)),
-                                                      (U8*) reginfo->strend);
-                    while (s < strend) {
-                        SB_enum after = getSB_VAL_UTF8((U8*) s,
-                                                         (U8*) reginfo->strend);
-                        if ((to_complement ^ isSB(before,
-                                                  after,
-                                                  (U8*) reginfo->strbeg,
-                                                  (U8*) s,
-                                                  (U8*) reginfo->strend,
-                                                  utf8_target))
-                            && (reginfo->intuit || regtry(reginfo, &s)))
-                        {
-                            goto got_it;
-                        }
-                        before = after;
-                        s += UTF8SKIP(s);
-                    }
-                }
-                else {  /* Not utf8. */
-                    SB_enum before = getSB_VAL_CP((U8) *(s -1));
-                    while (s < strend) {
-                        SB_enum after = getSB_VAL_CP((U8) *s);
-                        if ((to_complement ^ isSB(before,
+        /* Here are at the final position in the target string, which is a
+         * boundary by definition, so matches, depending on other constraints.
+         * */
+        if (   reginfo->intuit
+            || (s <= reginfo->strend && regtry(reginfo, &s)))
+        {
+            goto got_it;
+        }
+
+        break;
+
+      case BOUNDL_t8_pb:
+      case BOUNDL_t8_p8:
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+        if (FLAGS(c) == TRADITIONAL_BOUND) {
+            FBC_BOUND_UTF8(isWORDCHAR_LC, isWORDCHAR_LC_uvchr,
+                           isWORDCHAR_LC_utf8_safe);
+            break;
+        }
+
+        CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND;
+
+        to_complement = 1;
+        goto do_boundu_utf8;
+
+      case NBOUNDL_t8_pb:
+      case NBOUNDL_t8_p8:
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+        if (FLAGS(c) == TRADITIONAL_BOUND) {
+            FBC_NBOUND_UTF8(isWORDCHAR_LC, isWORDCHAR_LC_uvchr,
+                            isWORDCHAR_LC_utf8_safe);
+            break;
+        }
+
+        CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND;
+
+        to_complement = 1;
+        goto do_boundu_utf8;
+
+      case NBOUND_t8_pb:
+      case NBOUND_t8_p8:
+        /* regcomp.c makes sure that these only have the traditional \b
+         * meaning. */
+        assert(FLAGS(c) == TRADITIONAL_BOUND);
+
+        /* FALLTHROUGH */
+
+      case NBOUNDU_t8_pb:
+      case NBOUNDU_t8_p8:
+        if ((bound_type) FLAGS(c) == TRADITIONAL_BOUND) {
+            FBC_NBOUND_UTF8(isWORDCHAR_L1, isWORDCHAR_uni,
+                            isWORDCHAR_utf8_safe);
+            break;
+        }
+
+        to_complement = 1;
+        goto do_boundu_utf8;
+
+      case BOUND_t8_pb:
+      case BOUND_t8_p8:
+        /* regcomp.c makes sure that these only have the traditional \b
+         * meaning. */
+        assert(FLAGS(c) == TRADITIONAL_BOUND);
+
+        /* FALLTHROUGH */
+
+      case BOUNDU_t8_pb:
+      case BOUNDU_t8_p8:
+        if ((bound_type) FLAGS(c) == TRADITIONAL_BOUND) {
+            FBC_BOUND_UTF8(isWORDCHAR_L1, isWORDCHAR_uni, isWORDCHAR_utf8_safe);
+            break;
+        }
+
+      do_boundu_utf8:
+        if (s == reginfo->strbeg) {
+            if (reginfo->intuit || regtry(reginfo, &s))
+            {
+                goto got_it;
+            }
+
+            /* Didn't match.  Try at the next position (if there is one) */
+            s += UTF8_SAFE_SKIP(s, reginfo->strend);
+            if (UNLIKELY(s >= reginfo->strend)) {
+                break;
+            }
+        }
+
+        switch((bound_type) FLAGS(c)) {
+          case TRADITIONAL_BOUND: /* Should have already been handled */
+            assert(0);
+            break;
+
+          case GCB_BOUND:
+            {
+                GCB_enum before = getGCB_VAL_UTF8(
+                                           reghop3((U8*)s, -1,
+                                                   (U8*)(reginfo->strbeg)),
+                                           (U8*) reginfo->strend);
+                while (s < strend) {
+                    GCB_enum after = getGCB_VAL_UTF8((U8*) s,
+                                                    (U8*) reginfo->strend);
+                    if (   (to_complement ^ isGCB(before,
                                                    after,
                                                    (U8*) reginfo->strbeg,
                                                    (U8*) s,
-                                                  (U8*) reginfo->strend,
-                                                  utf8_target))
-                            && (reginfo->intuit || regtry(reginfo, &s)))
-                        {
-                            goto got_it;
-                        }
-                        before = after;
-                        s++;
+                                                  1 /* target is utf8 */ ))
+                        && (reginfo->intuit || regtry(reginfo, &s)))
+                    {
+                        goto got_it;
                      }
+                    before = after;
+                    s += UTF8_SAFE_SKIP(s, reginfo->strend);
                  }
+            }
+            break;
  
-                /* Here are at the final position in the target string.  The SB
-                 * value is always true here, so matches, depending on other
-                 * constraints */
-                if (reginfo->intuit || regtry(reginfo, &s)) {
-                    goto got_it;
+          case LB_BOUND:
+            {
+                LB_enum before = getLB_VAL_UTF8(reghop3((U8*)s,
+                                                        -1,
+                                                        (U8*)(reginfo->strbeg)),
+                                                   (U8*) reginfo->strend);
+                while (s < strend) {
+                    LB_enum after = getLB_VAL_UTF8((U8*) s,
+                                                   (U8*) reginfo->strend);
+                    if (to_complement ^ isLB(before,
+                                             after,
+                                             (U8*) reginfo->strbeg,
+                                             (U8*) s,
+                                             (U8*) reginfo->strend,
+                                             1 /* target is utf8 */ )
+                        && (reginfo->intuit || regtry(reginfo, &s)))
+                    {
+                        goto got_it;
+                    }
+                    before = after;
+                    s += UTF8_SAFE_SKIP(s, reginfo->strend);
                  }
+            }
  
-                break;
+            break;
  
-            case WB_BOUND:
-                if (s == reginfo->strbeg) {
-                    if (reginfo->intuit || regtry(reginfo, &s)) {
+          case SB_BOUND:
+            {
+                SB_enum before = getSB_VAL_UTF8(reghop3((U8*)s,
+                                                    -1,
+                                                    (U8*)(reginfo->strbeg)),
+                                                  (U8*) reginfo->strend);
+                while (s < strend) {
+                    SB_enum after = getSB_VAL_UTF8((U8*) s,
+                                                     (U8*) reginfo->strend);
+                    if ((to_complement ^ isSB(before,
+                                              after,
+                                              (U8*) reginfo->strbeg,
+                                              (U8*) s,
+                                              (U8*) reginfo->strend,
+                                              1 /* target is utf8 */ ))
+                        && (reginfo->intuit || regtry(reginfo, &s)))
+                    {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
-                    if (UNLIKELY(s >= reginfo->strend)) {
-                        break;
-                    }
+                    before = after;
+                    s += UTF8_SAFE_SKIP(s, reginfo->strend);
                  }
+            }
  
-                if (utf8_target) {
-                    /* We are at a boundary between char_sub_0 and char_sub_1.
-                     * We also keep track of the value for char_sub_-1 as we
-                     * loop through the line.   Context may be needed to make a
-                     * determination, and if so, this can save having to
-                     * recalculate it */
-                    WB_enum previous = WB_UNKNOWN;
-                    WB_enum before = getWB_VAL_UTF8(
-                                              reghop3((U8*)s,
-                                                      -1,
-                                                      (U8*)(reginfo->strbeg)),
-                                              (U8*) reginfo->strend);
-                    while (s < strend) {
-                        WB_enum after = getWB_VAL_UTF8((U8*) s,
-                                                        (U8*) reginfo->strend);
-                        if ((to_complement ^ isWB(previous,
-                                                  before,
-                                                  after,
-                                                  (U8*) reginfo->strbeg,
-                                                  (U8*) s,
-                                                  (U8*) reginfo->strend,
-                                                  utf8_target))
-                            && (reginfo->intuit || regtry(reginfo, &s)))
-                        {
-                            goto got_it;
-                        }
-                        previous = before;
-                        before = after;
-                        s += UTF8SKIP(s);
-                    }
-                }
-                else {  /* Not utf8. */
-                    WB_enum previous = WB_UNKNOWN;
-                    WB_enum before = getWB_VAL_CP((U8) *(s -1));
-                    while (s < strend) {
-                        WB_enum after = getWB_VAL_CP((U8) *s);
-                        if ((to_complement ^ isWB(previous,
-                                                  before,
-                                                  after,
-                                                  (U8*) reginfo->strbeg,
-                                                  (U8*) s,
-                                                  (U8*) reginfo->strend,
-                                                  utf8_target))
-                            && (reginfo->intuit || regtry(reginfo, &s)))
-                        {
-                            goto got_it;
-                        }
-                        previous = before;
-                        before = after;
-                        s++;
+            break;
+
+          case WB_BOUND:
+            {
+                /* We are at a boundary between char_sub_0 and char_sub_1.
+                 * We also keep track of the value for char_sub_-1 as we
+                 * loop through the line.   Context may be needed to make a
+                 * determination, and if so, this can save having to
+                 * recalculate it */
+                WB_enum previous = WB_UNKNOWN;
+                WB_enum before = getWB_VAL_UTF8(
+                                          reghop3((U8*)s,
+                                                  -1,
+                                                  (U8*)(reginfo->strbeg)),
+                                          (U8*) reginfo->strend);
+                while (s < strend) {
+                    WB_enum after = getWB_VAL_UTF8((U8*) s,
+                                                    (U8*) reginfo->strend);
+                    if ((to_complement ^ isWB(previous,
+                                              before,
+                                              after,
+                                              (U8*) reginfo->strbeg,
+                                              (U8*) s,
+                                              (U8*) reginfo->strend,
+                                              1 /* target is utf8 */ ))
+                        && (reginfo->intuit || regtry(reginfo, &s)))
+                    {
+                        goto got_it;
                      }
+                    previous = before;
+                    before = after;
+                    s += UTF8_SAFE_SKIP(s, reginfo->strend);
                  }
+            }
+        }
  
-                if (reginfo->intuit || regtry(reginfo, &s)) {
-                    goto got_it;
-                }
+        /* Here are at the final position in the target string, which is a
+         * boundary by definition, so matches, depending on other constraints.
+         * */
+
+        if (   reginfo->intuit
+            || (s <= reginfo->strend && regtry(reginfo, &s)))
+        {
+            goto got_it;
          }
          break;
  
-    case LNBREAK:
-        REXEC_FBC_CSCAN(is_LNBREAK_utf8_safe(s, strend),
-                        is_LNBREAK_latin1_safe(s, strend)
-        );
+      case LNBREAK_t8_pb:
+      case LNBREAK_t8_p8:
+        REXEC_FBC_UTF8_CLASS_SCAN(is_LNBREAK_utf8_safe(s, strend));
+        break;
+
+      case LNBREAK_tb_pb:
+      case LNBREAK_tb_p8:
+       REXEC_FBC_NON_UTF8_CLASS_SCAN(is_LNBREAK_latin1_safe(s, strend));
          break;
  
-    /* The argument to all the POSIX node types is the class number to pass to
-     * _generic_isCC() to build a mask for searching in PL_charclass[] */
+      /* The argument to all the POSIX node types is the class number to pass
+       * to _generic_isCC() to build a mask for searching in PL_charclass[] */
  
-    case NPOSIXL:
+      case NPOSIXL_t8_pb:
+      case NPOSIXL_t8_p8:
          to_complement = 1;
          /* FALLTHROUGH */
  
-    case POSIXL:
+      case POSIXL_t8_pb:
+      case POSIXL_t8_p8:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-        REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s, (U8 *) strend)),
-                        to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
+        REXEC_FBC_UTF8_CLASS_SCAN(
+            to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s,
+                                                          (U8 *) strend)));
          break;
  
-    case NPOSIXD:
+      case NPOSIXL_tb_pb:
+      case NPOSIXL_tb_p8:
          to_complement = 1;
          /* FALLTHROUGH */
  
-    case POSIXD:
-        if (utf8_target) {
-            goto posix_utf8;
-        }
-        goto posixa;
-
-    case NPOSIXA:
-        if (utf8_target) {
-            /* The complement of something that matches only ASCII matches all
-             * non-ASCII, plus everything in ASCII that isn't in the class. */
-            REXEC_FBC_CLASS_SCAN(1,   ! isASCII_utf8_safe(s, strend)
-                                   || ! _generic_isCC_A(*s, FLAGS(c)));
-            break;
-        }
+      case POSIXL_tb_pb:
+      case POSIXL_tb_p8:
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+        REXEC_FBC_NON_UTF8_CLASS_SCAN(
+                                to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
+        break;
  
-        to_complement = 1;
-        goto posixa;
+      case NPOSIXA_t8_pb:
+      case NPOSIXA_t8_p8:
+        /* The complement of something that matches only ASCII matches all
+         * non-ASCII, plus everything in ASCII that isn't in the class. */
+        REXEC_FBC_UTF8_CLASS_SCAN(   ! isASCII_utf8_safe(s, strend)
+                                  || ! _generic_isCC_A(*s, FLAGS(c)));
+        break;
  
-    case POSIXA:
+      case POSIXA_t8_pb:
+      case POSIXA_t8_p8:
          /* Don't need to worry about utf8, as it can match only a single
           * byte invariant character.  But we do anyway for performance reasons,
           * as otherwise we would have to examine all the continuation
           * characters */
-        if (utf8_target) {
-            REXEC_FBC_CLASS_SCAN(1, _generic_isCC_A(*s, FLAGS(c)));
-            break;
-        }
+        REXEC_FBC_UTF8_CLASS_SCAN(_generic_isCC_A(*s, FLAGS(c)));
+        break;
+
+      case NPOSIXD_tb_pb:
+      case NPOSIXD_tb_p8:
+      case NPOSIXA_tb_pb:
+      case NPOSIXA_tb_p8:
+        to_complement = 1;
+        /* FALLTHROUGH */
  
-      posixa:
-        REXEC_FBC_CLASS_SCAN(0, /* 0=>not-utf8 */
+      case POSIXD_tb_pb:
+      case POSIXD_tb_p8:
+      case POSIXA_tb_pb:
+      case POSIXA_tb_p8:
+        REXEC_FBC_NON_UTF8_CLASS_SCAN(
                          to_complement ^ cBOOL(_generic_isCC_A(*s, FLAGS(c))));
          break;
  
-    case NPOSIXU:
+      case NPOSIXU_tb_pb:
+      case NPOSIXU_tb_p8:
          to_complement = 1;
          /* FALLTHROUGH */
  
-    case POSIXU:
-        if (! utf8_target) {
-            REXEC_FBC_CLASS_SCAN(0, /* 0=>not-utf8 */
+      case POSIXU_tb_pb:
+      case POSIXU_tb_p8:
+            REXEC_FBC_NON_UTF8_CLASS_SCAN(
                                   to_complement ^ cBOOL(_generic_isCC(*s,
                                                                      FLAGS(c))));
-        }
-        else {
+        break;
  
-          posix_utf8:
-            classnum = (_char_class_number) FLAGS(c);
-            switch (classnum) {
-                default:
-                    REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */
+      case NPOSIXD_t8_pb:
+      case NPOSIXD_t8_p8:
+      case NPOSIXU_t8_pb:
+      case NPOSIXU_t8_p8:
+        to_complement = 1;
+        /* FALLTHROUGH */
+
+      case POSIXD_t8_pb:
+      case POSIXD_t8_p8:
+      case POSIXU_t8_pb:
+      case POSIXU_t8_p8:
+        classnum = (_char_class_number) FLAGS(c);
+        switch (classnum) {
+          default:
+            REXEC_FBC_UTF8_CLASS_SCAN(
                          to_complement ^ cBOOL(_invlist_contains_cp(
-                                              PL_XPosix_ptrs[classnum],
-                                              utf8_to_uvchr_buf((U8 *) s,
+                                                PL_XPosix_ptrs[classnum],
+                                                utf8_to_uvchr_buf((U8 *) s,
                                                                  (U8 *) strend,
                                                                  NULL))));
-                    break;
-                case _CC_ENUM_SPACE:
-                    REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */
+            break;
+
+          case _CC_ENUM_SPACE:
+            REXEC_FBC_UTF8_CLASS_SCAN(
                          to_complement ^ cBOOL(isSPACE_utf8_safe(s, strend)));
-                    break;
+            break;
  
-                case _CC_ENUM_BLANK:
-                    REXEC_FBC_CLASS_SCAN(1,
+          case _CC_ENUM_BLANK:
+            REXEC_FBC_UTF8_CLASS_SCAN(
                          to_complement ^ cBOOL(isBLANK_utf8_safe(s, strend)));
-                    break;
+            break;
  
-                case _CC_ENUM_XDIGIT:
-                    REXEC_FBC_CLASS_SCAN(1,
-                       to_complement ^ cBOOL(isXDIGIT_utf8_safe(s, strend)));
-                    break;
+          case _CC_ENUM_XDIGIT:
+            REXEC_FBC_UTF8_CLASS_SCAN(
+                        to_complement ^ cBOOL(isXDIGIT_utf8_safe(s, strend)));
+            break;
  
-                case _CC_ENUM_VERTSPACE:
-                    REXEC_FBC_CLASS_SCAN(1,
-                       to_complement ^ cBOOL(isVERTWS_utf8_safe(s, strend)));
-                    break;
+          case _CC_ENUM_VERTSPACE:
+            REXEC_FBC_UTF8_CLASS_SCAN(
+                        to_complement ^ cBOOL(isVERTWS_utf8_safe(s, strend)));
+            break;
  
-                case _CC_ENUM_CNTRL:
-                    REXEC_FBC_CLASS_SCAN(1,
+          case _CC_ENUM_CNTRL:
+            REXEC_FBC_UTF8_CLASS_SCAN(
                          to_complement ^ cBOOL(isCNTRL_utf8_safe(s, strend)));
-                    break;
-            }
+            break;
          }
          break;
  
-    case AHOCORASICKC:
-    case AHOCORASICK:
+      case AHOCORASICKC_tb_pb:
+      case AHOCORASICKC_tb_p8:
+      case AHOCORASICKC_t8_pb:
+      case AHOCORASICKC_t8_p8:
+      case AHOCORASICK_tb_pb:
+      case AHOCORASICK_tb_p8:
+      case AHOCORASICK_t8_pb:
+      case AHOCORASICK_t8_p8:
          {
              DECL_TRIE_TYPE(c);
              /* what trie are we using right now */
              reg_ac_data *aho = (reg_ac_data*)progi->data->data[ ARG( c ) ];
-            reg_trie_data *trie = (reg_trie_data*)progi->data->data[ aho->trie ];
+            reg_trie_data *trie = (reg_trie_data*)progi->data->data[aho->trie];
              HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
  
              const char *last_start = strend - trie->minlen;
@@ -2815,7 +3130,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
              U8 *bitmap=NULL;
  
  
-            GET_RE_DEBUG_FLAGS_DECL;
+            DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
              /* We can't just allocate points here. We need to wrap it in
               * an SV so it gets freed properly if there is a croak while
@@ -2874,19 +3189,26 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if( state==1 ) {
                          if ( bitmap ) {
                              DEBUG_TRIE_EXECUTE_r(
-                                if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
-                                    dump_exec_pos( (char *)uc, c, strend, real_start,
+                                if (  uc <= (U8*)last_start
+                                    && !BITMAP_TEST(bitmap,*uc) )
+                                {
+                                    dump_exec_pos( (char *)uc, c, strend,
+                                        real_start,
                                          (char *)uc, utf8_target, 0 );
                                      Perl_re_printf( aTHX_
                                          " Scanning for legal start char...\n");
                                  }
                              );
                              if (utf8_target) {
-                                while ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
+                                while (  uc <= (U8*)last_start
+                                       && !BITMAP_TEST(bitmap,*uc) )
+                                {
                                      uc += UTF8SKIP(uc);
                                  }
                              } else {
-                                while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
+                                while (  uc <= (U8*)last_start
+                                       && ! BITMAP_TEST(bitmap,*uc) )
+                                {
                                      uc++;
                                  }
                              }
@@ -2896,7 +3218,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
  
                      if ( word ) {
-                        U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
+                        U8 *lpos= points[ (pointpos - trie->wordinfo[word].len)
+                                                                    % maxlen ];
                          if (!leftmost || lpos < leftmost) {
                              DEBUG_r(accepted_word=word);
                              leftmost= lpos;
@@ -2932,7 +3255,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
  
                          DEBUG_TRIE_EXECUTE_r({
                              if (failed)
-                                dump_exec_pos( (char *)uc, c, strend, real_start,
+                                dump_exec_pos((char *)uc, c, strend, real_start,
                                      s,   utf8_target, 0 );
                              Perl_re_printf( aTHX_
                                  "%sState: %4" UVxf ", word=%" UVxf,
@@ -2977,7 +3300,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
                  }
                  if ( aho->states[ state ].wordnum ) {
-                    U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
+                    U8 *lpos = points[ (pointpos
+                                      - trie->wordinfo[aho->states[ state ]
+                                                    .wordnum].len) % maxlen ];
                      if (!leftmost || lpos < leftmost) {
                          DEBUG_r(accepted_word=aho->states[ state ].wordnum);
                          leftmost = lpos;
@@ -2986,7 +3311,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                  if (leftmost) {
                      s = (char*)leftmost;
                      DEBUG_TRIE_EXECUTE_r({
-                        Perl_re_printf( aTHX_  "Matches word #%" UVxf " at position %" IVdf ". Trying full pattern...\n",
+                        Perl_re_printf( aTHX_  "Matches word #%" UVxf
+                                        " at position %" IVdf ". Trying full"
+                                        " pattern...\n",
                              (UV)accepted_word, (IV)(s - real_start)
                          );
                      });
@@ -2995,9 +3322,13 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                          LEAVE;
                          goto got_it;
                      }
-                    s = HOPc(s,1);
+                    if (s < reginfo->strend) {
+                        s = HOPc(s,1);
+                    }
                      DEBUG_TRIE_EXECUTE_r({
-                        Perl_re_printf( aTHX_ "Pattern failed. Looking for new start point...\n");
+                        Perl_re_printf( aTHX_
+                                       "Pattern failed. Looking for new start"
+                                       " point...\n");
                      });
                  } else {
                      DEBUG_TRIE_EXECUTE_r(
@@ -3009,10 +3340,23 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
              LEAVE;
          }
          break;
-    default:
+
+      case EXACTFU_REQ8_t8_pb:
+      case EXACTFUP_tb_p8:
+      case EXACTFUP_t8_p8:
+      case EXACTF_tb_p8:
+      case EXACTF_t8_p8:   /* This node only generated for non-utf8 patterns */
+      case EXACTFAA_NO_TRIE_tb_p8:
+      case EXACTFAA_NO_TRIE_t8_p8: /* This node only generated for non-utf8
+                                      patterns */
+        assert(0);
+
+      default:
          Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
-    }
+    } /* End of switch on node type */
+
      return 0;
+
    got_it:
      return s;
  }
@@ -3196,7 +3540,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
      regmatch_info *const reginfo = &reginfo_buf;
      regexp_paren_pair *swap = NULL;
      I32 oldsave;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGEXEC_FLAGS;
      PERL_UNUSED_ARG(data);
@@ -3250,7 +3594,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                  if (!startpos ||
                      ((flags & REXEC_FAIL_ON_UNDERFLOW) && startpos < stringarg))
                  {
-                    DEBUG_r(Perl_re_printf( aTHX_
+                    DEBUG_GPOS_r(Perl_re_printf( aTHX_
                              "fail: ganch-gofs before earliest possible start\n"));
                      return 0;
                  }
@@ -3269,8 +3613,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
  
      minlen = prog->minlen;
      if ((startpos + minlen) > strend || startpos < strbeg) {
-        DEBUG_r(Perl_re_printf( aTHX_
-                    "Regex match can't succeed, so not even tried\n"));
+       DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
+                        "Regex match can't succeed, so not even tried\n"));
          return 0;
      }
  
@@ -3315,7 +3659,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
              RXp_MATCH_UTF8_set(prog, utf8_target);
              prog->offs[0].start = s - strbeg;
              prog->offs[0].end = utf8_target
-                ? (char*)utf8_hop((U8*)s, prog->minlenret) - strbeg
+                ? (char*)utf8_hop_forward((U8*)s, prog->minlenret, (U8 *) strend) - strbeg
                  : s - strbeg + prog->minlenret;
              if ( !(flags & REXEC_NOT_FIRST) )
                  S_reg_set_capture_string(aTHX_ rx,
@@ -3424,7 +3768,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
             we switch it back; otherwise we leave it swapped.
          */
          swap = prog->offs;
-        /* do we need a save destructor here for eval dies? */
+        /* avoid leak if we die, or clean up anyway if match completes */
+        SAVEFREEPV(swap);
          Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
          DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
             "rex=0x%" UVxf " saving  offs: orig=0x%" UVxf " new=0x%" UVxf "\n",
@@ -3509,11 +3854,11 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                  to_utf8_substr(prog);
              }
              ch = SvPVX_const(prog->anchored_utf8)[0];
-           REXEC_FBC_SCAN(0,   /* 0=>not-utf8 */
+           REXEC_FBC_UTF8_SCAN(
                 if (*s == ch) {
                     DEBUG_EXECUTE_r( did_match = 1 );
                     if (regtry(reginfo, &s)) goto got_it;
-                   s += UTF8SKIP(s);
+                   s += UTF8_SAFE_SKIP(s, strend);
                     while (s < strend && *s == ch)
                         s += UTF8SKIP(s);
                 }
@@ -3527,7 +3872,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                  }
              }
              ch = SvPVX_const(prog->anchored_substr)[0];
-           REXEC_FBC_SCAN(0,   /* 0=>not-utf8 */
+           REXEC_FBC_NON_UTF8_SCAN(
                 if (*s == ch) {
                     DEBUG_EXECUTE_r( did_match = 1 );
                     if (regtry(reginfo, &s)) goto got_it;
@@ -3809,17 +4154,6 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
          goto phooey;
      }
  
-    DEBUG_BUFFERS_r(
-       if (swap)
-            Perl_re_exec_indentf( aTHX_
-               "rex=0x%" UVxf " freeing offs: 0x%" UVxf "\n",
-               0,
-                PTR2UV(prog),
-               PTR2UV(swap)
-           );
-    );
-    Safefree(swap);
-
      /* clean up; this will trigger destructors that will free all slabs
       * above the current one, and cleanup the regmatch_info_aux
       * and regmatch_info_aux_eval sructs */
@@ -3841,24 +4175,29 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
      DEBUG_EXECUTE_r(Perl_re_printf( aTHX_  "%sMatch failed%s\n",
                           PL_colors[4], PL_colors[5]));
  
-    /* clean up; this will trigger destructors that will free all slabs
-     * above the current one, and cleanup the regmatch_info_aux
-     * and regmatch_info_aux_eval sructs */
-
-    LEAVE_SCOPE(oldsave);
-
      if (swap) {
-        /* we failed :-( roll it back */
+        /* we failed :-( roll it back.
+         * Since the swap buffer will be freed on scope exit which follows
+         * shortly, restore the old captures by copying 'swap's original
+         * data to the new offs buffer
+         */
          DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
-           "rex=0x%" UVxf " rolling back offs: freeing=0x%" UVxf " restoring=0x%" UVxf "\n",
+           "rex=0x%" UVxf " rolling back offs: 0x%" UVxf " will be freed; restoring data to =0x%" UVxf "\n",
             0,
              PTR2UV(prog),
             PTR2UV(prog->offs),
             PTR2UV(swap)
         ));
-        Safefree(prog->offs);
-        prog->offs = swap;
+
+        Copy(swap, prog->offs, prog->nparens + 1, regexp_paren_pair);
      }
+
+    /* clean up; this will trigger destructors that will free all slabs
+     * above the current one, and cleanup the regmatch_info_aux
+     * and regmatch_info_aux_eval sructs */
+
+    LEAVE_SCOPE(oldsave);
+
      return 0;
  }
  
@@ -3887,7 +4226,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
      U32 depth = 0; /* used by REGCP_SET */
  #endif
      RXi_GET_DECL(prog,progi);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGTRY;
  
@@ -3933,240 +4272,51 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
      result = regmatch(reginfo, *startposp, progi->program + 1);
      if (result != -1) {
         prog->offs[0].end = result;
-       return 1;
-    }
-    if (reginfo->cutpoint)
-        *startposp= reginfo->cutpoint;
-    REGCP_UNWIND(lastcp);
-    return 0;
-}
-
-
-#define sayYES goto yes
-#define sayNO goto no
-#define sayNO_SILENT goto no_silent
-
-/* we dont use STMT_START/END here because it leads to 
-   "unreachable code" warnings, which are bogus, but distracting. */
-#define CACHEsayNO \
-    if (ST.cache_mask) \
-       reginfo->info_aux->poscache[ST.cache_offset] |= ST.cache_mask; \
-    sayNO
-
-/* this is used to determine how far from the left messages like
-   'failed...' are printed in regexec.c. It should be set such that
-   messages are inline with the regop output that created them.
-*/
-#define REPORT_CODE_OFF 29
-#define INDENT_CHARS(depth) ((int)(depth) % 20)
-#ifdef DEBUGGING
-int
-Perl_re_exec_indentf(pTHX_ const char *fmt, U32 depth, ...)
-{
-    va_list ap;
-    int result;
-    PerlIO *f= Perl_debug_log;
-    PERL_ARGS_ASSERT_RE_EXEC_INDENTF;
-    va_start(ap, depth);
-    PerlIO_printf(f, "%*s|%4" UVuf "| %*s", REPORT_CODE_OFF, "", (UV)depth, INDENT_CHARS(depth), "" );
-    result = PerlIO_vprintf(f, fmt, ap);
-    va_end(ap);
-    return result;
-}
-#endif /* DEBUGGING */
-
-
-#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
-#define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
-#define CHRTEST_NOT_A_CP_1 -999
-#define CHRTEST_NOT_A_CP_2 -998
-
-/* grab a new slab and return the first slot in it */
-
-STATIC regmatch_state *
-S_push_slab(pTHX)
-{
-    regmatch_slab *s = PL_regmatch_slab->next;
-    if (!s) {
-       Newx(s, 1, regmatch_slab);
-       s->prev = PL_regmatch_slab;
-       s->next = NULL;
-       PL_regmatch_slab->next = s;
-    }
-    PL_regmatch_slab = s;
-    return SLAB_FIRST(s);
-}
-
-
-/* push a new state then goto it */
-
-#define PUSH_STATE_GOTO(state, node, input) \
-    pushinput = input; \
-    scan = node; \
-    st->resume_state = state; \
-    goto push_state;
-
-/* push a new state with success backtracking, then goto it */
-
-#define PUSH_YES_STATE_GOTO(state, node, input) \
-    pushinput = input; \
-    scan = node; \
-    st->resume_state = state; \
-    goto push_yes_state;
-
-
-
-
-/*
-
-regmatch() - main matching routine
-
-This is basically one big switch statement in a loop. We execute an op,
-set 'next' to point the next op, and continue. If we come to a point which
-we may need to backtrack to on failure such as (A|B|C), we push a
-backtrack state onto the backtrack stack. On failure, we pop the top
-state, and re-enter the loop at the state indicated. If there are no more
-states to pop, we return failure.
-
-Sometimes we also need to backtrack on success; for example /A+/, where
-after successfully matching one A, we need to go back and try to
-match another one; similarly for lookahead assertions: if the assertion
-completes successfully, we backtrack to the state just before the assertion
-and then carry on.  In these cases, the pushed state is marked as
-'backtrack on success too'. This marking is in fact done by a chain of
-pointers, each pointing to the previous 'yes' state. On success, we pop to
-the nearest yes state, discarding any intermediate failure-only states.
-Sometimes a yes state is pushed just to force some cleanup code to be
-called at the end of a successful match or submatch; e.g. (??{$re}) uses
-it to free the inner regex.
-
-Note that failure backtracking rewinds the cursor position, while
-success backtracking leaves it alone.
-
-A pattern is complete when the END op is executed, while a subpattern
-such as (?=foo) is complete when the SUCCESS op is executed. Both of these
-ops trigger the "pop to last yes state if any, otherwise return true"
-behaviour.
-
-A common convention in this function is to use A and B to refer to the two
-subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
-the subpattern to be matched possibly multiple times, while B is the entire
-rest of the pattern. Variable and state names reflect this convention.
-
-The states in the main switch are the union of ops and failure/success of
-substates associated with with that op.  For example, IFMATCH is the op
-that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
-'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
-successfully matched A and IFMATCH_A_fail is a state saying that we have
-just failed to match A. Resume states always come in pairs. The backtrack
-state we push is marked as 'IFMATCH_A', but when that is popped, we resume
-at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
-on success or failure.
-
-The struct that holds a backtracking state is actually a big union, with
-one variant for each major type of op. The variable st points to the
-top-most backtrack struct. To make the code clearer, within each
-block of code we #define ST to alias the relevant union.
-
-Here's a concrete example of a (vastly oversimplified) IFMATCH
-implementation:
-
-    switch (state) {
-    ....
-
-#define ST st->u.ifmatch
-
-    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
-       ST.foo = ...; // some state we wish to save
-       ...
-       // push a yes backtrack state with a resume value of
-       // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
-       // first node of A:
-       PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
-       // NOTREACHED
-
-    case IFMATCH_A: // we have successfully executed A; now continue with B
-       next = B;
-       bar = ST.foo; // do something with the preserved value
-       break;
-
-    case IFMATCH_A_fail: // A failed, so the assertion failed
-       ...;   // do some housekeeping, then ...
-       sayNO; // propagate the failure
-
-#undef ST
-
-    ...
-    }
-
-For any old-timers reading this who are familiar with the old recursive
-approach, the code above is equivalent to:
-
-    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
-    {
-       int foo = ...
-       ...
-       if (regmatch(A)) {
-           next = B;
-           bar = foo;
-           break;
-       }
-       ...;   // do some housekeeping, then ...
-       sayNO; // propagate the failure
+       return 1;
      }
+    if (reginfo->cutpoint)
+        *startposp= reginfo->cutpoint;
+    REGCP_UNWIND(lastcp);
+    return 0;
+}
  
-The topmost backtrack state, pointed to by st, is usually free. If you
-want to claim it, populate any ST.foo fields in it with values you wish to
-save, then do one of
-
-       PUSH_STATE_GOTO(resume_state, node, newinput);
-       PUSH_YES_STATE_GOTO(resume_state, node, newinput);
-
-which sets that backtrack state's resume value to 'resume_state', pushes a
-new free entry to the top of the backtrack stack, then goes to 'node'.
-On backtracking, the free slot is popped, and the saved state becomes the
-new free state. An ST.foo field in this new top state can be temporarily
-accessed to retrieve values, but once the main loop is re-entered, it
-becomes available for reuse.
-
-Note that the depth of the backtrack stack constantly increases during the
-left-to-right execution of the pattern, rather than going up and down with
-the pattern nesting. For example the stack is at its maximum at Z at the
-end of the pattern, rather than at X in the following:
-
-    /(((X)+)+)+....(Y)+....Z/
-
-The only exceptions to this are lookahead/behind assertions and the cut,
-(?>A), which pop all the backtrack states associated with A before
-continuing.
- 
-Backtrack state structs are allocated in slabs of about 4K in size.
-PL_regmatch_state and st always point to the currently active state,
-and PL_regmatch_slab points to the slab currently containing
-PL_regmatch_state.  The first time regmatch() is called, the first slab is
-allocated, and is never freed until interpreter destruction. When the slab
-is full, a new one is allocated and chained to the end. At exit from
-regmatch(), slabs allocated since entry are freed.
-
+/* this is used to determine how far from the left messages like
+   'failed...' are printed in regexec.c. It should be set such that
+   messages are inline with the regop output that created them.
  */
- 
-
-#define DEBUG_STATE_pp(pp)                                  \
-    DEBUG_STATE_r({                                         \
-        DUMP_EXEC_POS(locinput, scan, utf8_target,depth);   \
-        Perl_re_printf( aTHX_                                           \
-            "%*s" pp " %s%s%s%s%s\n",                       \
-            INDENT_CHARS(depth), "",                        \
-            PL_reg_name[st->resume_state],                  \
-            ((st==yes_state||st==mark_state) ? "[" : ""),   \
-            ((st==yes_state) ? "Y" : ""),                   \
-            ((st==mark_state) ? "M" : ""),                  \
-            ((st==yes_state||st==mark_state) ? "]" : "")    \
-        );                                                  \
-    });
+#define REPORT_CODE_OFF 29
+#define INDENT_CHARS(depth) ((int)(depth) % 20)
+#ifdef DEBUGGING
+int
+Perl_re_exec_indentf(pTHX_ const char *fmt, U32 depth, ...)
+{
+    va_list ap;
+    int result;
+    PerlIO *f= Perl_debug_log;
+    PERL_ARGS_ASSERT_RE_EXEC_INDENTF;
+    va_start(ap, depth);
+    PerlIO_printf(f, "%*s|%4" UVuf "| %*s", REPORT_CODE_OFF, "", (UV)depth, INDENT_CHARS(depth), "" );
+    result = PerlIO_vprintf(f, fmt, ap);
+    va_end(ap);
+    return result;
+}
+#endif /* DEBUGGING */
  
+/* grab a new slab and return the first slot in it */
  
-#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
+STATIC regmatch_state *
+S_push_slab(pTHX)
+{
+    regmatch_slab *s = PL_regmatch_slab->next;
+    if (!s) {
+       Newx(s, 1, regmatch_slab);
+       s->prev = PL_regmatch_slab;
+       s->next = NULL;
+       PL_regmatch_slab->next = s;
+    }
+    PL_regmatch_slab = s;
+    return SLAB_FIRST(s);
+}
  
  #ifdef DEBUGGING
  
@@ -4295,14 +4445,19 @@ S_reg_check_named_buff_matched(const regexp *rex, const regnode *scan)
      return 0;
  }
  
+#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
+#define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
+#define CHRTEST_NOT_A_CP_1 -999
+#define CHRTEST_NOT_A_CP_2 -998
  
  static bool
  S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
          U8* c1_utf8, int *c2p, U8* c2_utf8, regmatch_info *reginfo)
  {
-    /* This function determines if there are one or two characters that match
-     * the first character of the passed-in EXACTish node <text_node>, and if
-     * so, returns them in the passed-in pointers.
+    /* This function determines if there are zero, one, two, or more characters
+     * that match the first character of the passed-in EXACTish node
+     * <text_node>, and if there are one or two, it returns them in the
+     * passed-in pointers.
       *
       * If it determines that no possible character in the target string can
       * match, it returns FALSE; otherwise TRUE.  (The FALSE situation occurs if
@@ -4355,27 +4510,25 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
      UV c1 = (UV)CHRTEST_NOT_A_CP_1;
      UV c2 = (UV)CHRTEST_NOT_A_CP_2;
      bool use_chrtest_void = FALSE;
-    const bool is_utf8_pat = reginfo->is_utf8_pat;
+    const bool utf8_pat = reginfo->is_utf8_pat;
  
      /* Used when we have both utf8 input and utf8 output, to avoid converting
       * to/from code points */
      bool utf8_has_been_setup = FALSE;
  
-    dVAR;
  
      U8 *pat = (U8*)STRING(text_node);
      U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
+    const U8 op = OP(text_node);
  
-    if (   OP(text_node) == EXACT
-        || OP(text_node) == EXACT_ONLY8
-        || OP(text_node) == EXACTL)
-    {
+    if (! isEXACTFish(OP(text_node))) {
  
          /* In an exact node, only one thing can be matched, that first
           * character.  If both the pat and the target are UTF-8, we can just
           * copy the input to the output, avoiding finding the code point of
           * that character */
-        if (!is_utf8_pat) {
+        if (! utf8_pat) {
+            assert(! isEXACT_REQ8(OP(text_node)));
              c2 = c1 = *pat;
          }
          else if (utf8_target) {
@@ -4383,12 +4536,15 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
              Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
              utf8_has_been_setup = TRUE;
          }
+        else if (isEXACT_REQ8(OP(text_node))) {
+            return FALSE;   /* Can only match UTF-8 target */
+        }
          else {
              c2 = c1 = valid_utf8_to_uvchr(pat, NULL);
          }
      }
      else { /* an EXACTFish node */
-        U8 *pat_end = pat + STR_LEN(text_node);
+        U8 *pat_end = pat + STR_LENs(text_node);
  
          /* An EXACTFL node has at least some characters unfolded, because what
           * they match is not known until now.  So, now is the time to fold
@@ -4401,9 +4557,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
           * fold.  But, in such a pattern only locale-problematic characters
           * aren't folded, so we can skip this completely if the first character
           * in the node isn't one of the tricky ones */
-        if (OP(text_node) == EXACTFL) {
+        if (op == EXACTFL) {
  
-            if (! is_utf8_pat) {
+            if (! utf8_pat) {
                  if (IN_UTF8_CTYPE_LOCALE && *pat == LATIN_SMALL_LETTER_SHARP_S)
                  {
                      folded[0] = folded[1] = 's';
@@ -4438,8 +4594,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
              }
          }
  
-        if (    ( is_utf8_pat && is_MULTI_CHAR_FOLD_utf8_safe(pat, pat_end))
-             || (!is_utf8_pat && is_MULTI_CHAR_FOLD_latin1_safe(pat, pat_end)))
+        if (    ( utf8_pat && is_MULTI_CHAR_FOLD_utf8_safe(pat, pat_end))
+             || (!utf8_pat && is_MULTI_CHAR_FOLD_latin1_safe(pat, pat_end)))
          {
              /* Multi-character folds require more context to sort out.  Also
               * PL_utf8_foldclosures used below doesn't handle them, so have to
@@ -4447,10 +4603,10 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
              use_chrtest_void = TRUE;
          }
          else { /* an EXACTFish node which doesn't begin with a multi-char fold */
-            c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
+            c1 = utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
  
              if (   UNLIKELY(PL_in_utf8_turkic_locale)
-                && OP(text_node) == EXACTFL
+                && op == EXACTFL
                  && UNLIKELY(   c1 == 'i' || c1 == 'I'
                              || c1 == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
                              || c1 == LATIN_SMALL_LETTER_DOTLESS_I))
@@ -4470,8 +4626,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                  }
              }
              else if (c1 > 255) {
-                const unsigned int * remaining_folds;
-                unsigned int first_fold;
+                const U32 * remaining_folds;
+                U32 first_fold;
  
                  /* Look up what code points (besides c1) fold to c1;  e.g.,
                   * [ 'K', KELVIN_SIGN ] both fold to 'k'. */
@@ -4496,10 +4652,10 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                       * circumstances.  If it isn't, it means the only legal
                       * match of c1 is itself. */
                      if (    c2 < 256
-                        && (   (   OP(text_node) == EXACTFL
+                        && (   (   op == EXACTFL
                                  && ! IN_UTF8_CTYPE_LOCALE)
-                            || ((     OP(text_node) == EXACTFAA
-                                   || OP(text_node) == EXACTFAA_NO_TRIE)
+                            || ((     op == EXACTFAA
+                                   || op == EXACTFAA_NO_TRIE)
                                  && (isASCII(c1) || isASCII(c2)))))
                      {
                          c2 = c1;
@@ -4509,9 +4665,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
              else /* Here, c1 is <= 255 */
                  if (   utf8_target
                      && HAS_NONLATIN1_FOLD_CLOSURE(c1)
-                    && ( ! (OP(text_node) == EXACTFL && ! IN_UTF8_CTYPE_LOCALE))
-                    && (   (   OP(text_node) != EXACTFAA
-                            && OP(text_node) != EXACTFAA_NO_TRIE)
+                    && ( ! (op == EXACTFL && ! IN_UTF8_CTYPE_LOCALE))
+                    && (   (   op != EXACTFAA
+                            && op != EXACTFAA_NO_TRIE)
                          ||   ! isASCII(c1)))
              {
                  /* Here, there could be something above Latin1 in the target
@@ -4528,7 +4684,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
              }
              else { /* Here nothing above Latin1 can fold to the pattern
                        character */
-                switch (OP(text_node)) {
+                switch (op) {
  
                      case EXACTFL:   /* /l rules */
                          c2 = PL_fold_locale[c1];
@@ -4536,7 +4692,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
  
                      case EXACTF:   /* This node only generated for non-utf8
                                      patterns */
-                        assert(! is_utf8_pat);
+                        assert(! utf8_pat);
                          if (! utf8_target) {    /* /d rules */
                              c2 = PL_fold[c1];
                              break;
@@ -4546,16 +4702,19 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                          * EXACTFAA as nothing in Latin1 folds to ASCII */
                      case EXACTFAA_NO_TRIE:   /* This node only generated for
                                                  non-utf8 patterns */
-                        assert(! is_utf8_pat);
+                        assert(! utf8_pat);
                          /* FALLTHROUGH */
                      case EXACTFAA:
                      case EXACTFUP:
                      case EXACTFU:
                          c2 = PL_fold_latin1[c1];
                          break;
+                    case EXACTFU_REQ8:
+                        return FALSE;
+                        NOT_REACHED; /* NOTREACHED */
  
                      default:
-                        Perl_croak(aTHX_ "panic: Unexpected op %u", OP(text_node));
+                        Perl_croak(aTHX_ "panic: Unexpected op %u", op);
                          NOT_REACHED; /* NOTREACHED */
                  }
              }
@@ -4602,7 +4761,7 @@ STATIC bool
  S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strbeg, const U8 * const curpos, const bool utf8_target)
  {
      /* returns a boolean indicating if there is a Grapheme Cluster Boundary
-     * between the inputs.  See http://www.unicode.org/reports/tr29/. */
+     * between the inputs.  See https://www.unicode.org/reports/tr29/. */
  
      PERL_ARGS_ASSERT_ISGCB;
  
@@ -4664,7 +4823,7 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
                  }
                  while (prev == GCB_Extend);
  
-                return prev != GCB_XPG_XX;
+                return prev != GCB_ExtPict_XX;
              }
  
          default:
@@ -4682,7 +4841,6 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
  STATIC GCB_enum
  S_backup_one_GCB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      GCB_enum gcb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_GCB;
@@ -4960,7 +5118,6 @@ S_isLB(pTHX_ LB_enum before,
  STATIC LB_enum
  S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
  {
-    dVAR;
  
      LB_enum lb;
  
@@ -4991,7 +5148,6 @@ S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_ta
  STATIC LB_enum
  S_backup_one_LB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      LB_enum lb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_LB;
@@ -5039,7 +5195,7 @@ S_isSB(pTHX_ SB_enum before,
               const bool utf8_target)
  {
      /* returns a boolean indicating if there is a Sentence Boundary Break
-     * between the inputs.  See http://www.unicode.org/reports/tr29/ */
+     * between the inputs.  See https://www.unicode.org/reports/tr29/ */
  
      U8 * lpos = (U8 *) curpos;
      bool has_para_sep = FALSE;
@@ -5228,7 +5384,6 @@ S_isSB(pTHX_ SB_enum before,
  STATIC SB_enum
  S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
  {
-    dVAR;
      SB_enum sb;
  
      PERL_ARGS_ASSERT_ADVANCE_ONE_SB;
@@ -5262,7 +5417,6 @@ S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_ta
  STATIC SB_enum
  S_backup_one_SB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      SB_enum sb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_SB;
@@ -5499,7 +5653,6 @@ S_advance_one_WB(pTHX_ U8 ** curpos,
                         const bool utf8_target,
                         const bool skip_Extend_Format)
  {
-    dVAR;
      WB_enum wb;
  
      PERL_ARGS_ASSERT_ADVANCE_ONE_WB;
@@ -5537,7 +5690,6 @@ S_advance_one_WB(pTHX_ U8 ** curpos,
  STATIC WB_enum
  S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      WB_enum wb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_WB;
@@ -5608,36 +5760,234 @@ S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos,
          } while (wb == WB_Extend || wb == WB_Format);
      }
  
-    return wb;
-}
+    return wb;
+}
+
+/* Macros for regmatch(), using its internal variables */
+#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
+#define NEXTCHR_IS_EOS (nextbyte < 0)
+
+#define SET_nextchr \
+    nextbyte = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
+
+#define SET_locinput(p) \
+    locinput = (p);  \
+    SET_nextchr
+
+#define sayYES goto yes
+#define sayNO goto no
+#define sayNO_SILENT goto no_silent
+
+/* we dont use STMT_START/END here because it leads to
+   "unreachable code" warnings, which are bogus, but distracting. */
+#define CACHEsayNO \
+    if (ST.cache_mask) \
+       reginfo->info_aux->poscache[ST.cache_offset] |= ST.cache_mask; \
+    sayNO
+
+#define EVAL_CLOSE_PAREN_IS(st,expr)                        \
+(                                                           \
+    (   ( st )                                         ) && \
+    (   ( st )->u.eval.close_paren                     ) && \
+    ( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) )    \
+)
+
+#define EVAL_CLOSE_PAREN_IS_TRUE(st,expr)                   \
+(                                                           \
+    (   ( st )                                         ) && \
+    (   ( st )->u.eval.close_paren                     ) && \
+    (   ( expr )                                       ) && \
+    ( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) )    \
+)
+
+
+#define EVAL_CLOSE_PAREN_SET(st,expr) \
+    (st)->u.eval.close_paren = ( (expr) + 1 )
+
+#define EVAL_CLOSE_PAREN_CLEAR(st) \
+    (st)->u.eval.close_paren = 0
+
+/* push a new state then goto it */
+
+#define PUSH_STATE_GOTO(state, node, input, eol, sr0)       \
+    pushinput = input; \
+    pusheol = eol; \
+    pushsr0 = sr0; \
+    scan = node; \
+    st->resume_state = state; \
+    goto push_state;
+
+/* push a new state with success backtracking, then goto it */
+
+#define PUSH_YES_STATE_GOTO(state, node, input, eol, sr0)   \
+    pushinput = input; \
+    pusheol = eol;     \
+    pushsr0 = sr0; \
+    scan = node; \
+    st->resume_state = state; \
+    goto push_yes_state;
+
+#define DEBUG_STATE_pp(pp)                                  \
+    DEBUG_STATE_r({                                         \
+        DUMP_EXEC_POS(locinput, scan, utf8_target,depth);   \
+        Perl_re_printf( aTHX_                               \
+            "%*s" pp " %s%s%s%s%s\n",                       \
+            INDENT_CHARS(depth), "",                        \
+            PL_reg_name[st->resume_state],                  \
+            ((st==yes_state||st==mark_state) ? "[" : ""),   \
+            ((st==yes_state) ? "Y" : ""),                   \
+            ((st==mark_state) ? "M" : ""),                  \
+            ((st==yes_state||st==mark_state) ? "]" : "")    \
+        );                                                  \
+    });
+
+/*
+
+regmatch() - main matching routine
+
+This is basically one big switch statement in a loop. We execute an op,
+set 'next' to point the next op, and continue. If we come to a point which
+we may need to backtrack to on failure such as (A|B|C), we push a
+backtrack state onto the backtrack stack. On failure, we pop the top
+state, and re-enter the loop at the state indicated. If there are no more
+states to pop, we return failure.
+
+Sometimes we also need to backtrack on success; for example /A+/, where
+after successfully matching one A, we need to go back and try to
+match another one; similarly for lookahead assertions: if the assertion
+completes successfully, we backtrack to the state just before the assertion
+and then carry on.  In these cases, the pushed state is marked as
+'backtrack on success too'. This marking is in fact done by a chain of
+pointers, each pointing to the previous 'yes' state. On success, we pop to
+the nearest yes state, discarding any intermediate failure-only states.
+Sometimes a yes state is pushed just to force some cleanup code to be
+called at the end of a successful match or submatch; e.g. (??{$re}) uses
+it to free the inner regex.
+
+Note that failure backtracking rewinds the cursor position, while
+success backtracking leaves it alone.
+
+A pattern is complete when the END op is executed, while a subpattern
+such as (?=foo) is complete when the SUCCESS op is executed. Both of these
+ops trigger the "pop to last yes state if any, otherwise return true"
+behaviour.
+
+A common convention in this function is to use A and B to refer to the two
+subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
+the subpattern to be matched possibly multiple times, while B is the entire
+rest of the pattern. Variable and state names reflect this convention.
+
+The states in the main switch are the union of ops and failure/success of
+substates associated with that op.  For example, IFMATCH is the op
+that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
+'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
+successfully matched A and IFMATCH_A_fail is a state saying that we have
+just failed to match A. Resume states always come in pairs. The backtrack
+state we push is marked as 'IFMATCH_A', but when that is popped, we resume
+at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
+on success or failure.
+
+The struct that holds a backtracking state is actually a big union, with
+one variant for each major type of op. The variable st points to the
+top-most backtrack struct. To make the code clearer, within each
+block of code we #define ST to alias the relevant union.
+
+Here's a concrete example of a (vastly oversimplified) IFMATCH
+implementation:
+
+    switch (state) {
+    ....
+
+#define ST st->u.ifmatch
+
+    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
+       ST.foo = ...; // some state we wish to save
+       ...
+       // push a yes backtrack state with a resume value of
+       // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
+       // first node of A:
+       PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
+       // NOTREACHED
+
+    case IFMATCH_A: // we have successfully executed A; now continue with B
+       next = B;
+       bar = ST.foo; // do something with the preserved value
+       break;
+
+    case IFMATCH_A_fail: // A failed, so the assertion failed
+       ...;   // do some housekeeping, then ...
+       sayNO; // propagate the failure
+
+#undef ST
+
+    ...
+    }
+
+For any old-timers reading this who are familiar with the old recursive
+approach, the code above is equivalent to:
+
+    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
+    {
+       int foo = ...
+       ...
+       if (regmatch(A)) {
+           next = B;
+           bar = foo;
+           break;
+       }
+       ...;   // do some housekeeping, then ...
+       sayNO; // propagate the failure
+    }
+
+The topmost backtrack state, pointed to by st, is usually free. If you
+want to claim it, populate any ST.foo fields in it with values you wish to
+save, then do one of
  
-#define EVAL_CLOSE_PAREN_IS(st,expr)                        \
-(                                                           \
-    (   ( st )                                         ) && \
-    (   ( st )->u.eval.close_paren                     ) && \
-    ( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) )    \
-)
+       PUSH_STATE_GOTO(resume_state, node, newinput, new_eol);
+       PUSH_YES_STATE_GOTO(resume_state, node, newinput, new_eol);
  
-#define EVAL_CLOSE_PAREN_IS_TRUE(st,expr)                   \
-(                                                           \
-    (   ( st )                                         ) && \
-    (   ( st )->u.eval.close_paren                     ) && \
-    (   ( expr )                                       ) && \
-    ( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) )    \
-)
+which sets that backtrack state's resume value to 'resume_state', pushes a
+new free entry to the top of the backtrack stack, then goes to 'node'.
+On backtracking, the free slot is popped, and the saved state becomes the
+new free state. An ST.foo field in this new top state can be temporarily
+accessed to retrieve values, but once the main loop is re-entered, it
+becomes available for reuse.
+
+Note that the depth of the backtrack stack constantly increases during the
+left-to-right execution of the pattern, rather than going up and down with
+the pattern nesting. For example the stack is at its maximum at Z at the
+end of the pattern, rather than at X in the following:
  
+    /(((X)+)+)+....(Y)+....Z/
  
-#define EVAL_CLOSE_PAREN_SET(st,expr) \
-    (st)->u.eval.close_paren = ( (expr) + 1 )
+The only exceptions to this are lookahead/behind assertions and the cut,
+(?>A), which pop all the backtrack states associated with A before
+continuing.
  
-#define EVAL_CLOSE_PAREN_CLEAR(st) \
-    (st)->u.eval.close_paren = 0
+Backtrack state structs are allocated in slabs of about 4K in size.
+PL_regmatch_state and st always point to the currently active state,
+and PL_regmatch_slab points to the slab currently containing
+PL_regmatch_state.  The first time regmatch() is called, the first slab is
+allocated, and is never freed until interpreter destruction. When the slab
+is full, a new one is allocated and chained to the end. At exit from
+regmatch(), slabs allocated since entry are freed.
+
+In order to work with variable length lookbehinds, an upper limit is placed on
+lookbehinds which is set to where the match position is at the end of where the
+lookbehind would get to.  Nothing in the lookbehind should match above that,
+except we should be able to look beyond if for things like \b, which need the
+next character in the string to be able to determine if this is a boundary or
+not.  We also can't match the end of string/line unless we are also at the end
+of the entire string, so NEXTCHR_IS_EOS remains the same, and for those OPs
+that match a width, we have to add a condition that they are within the legal
+bounds of our window into the string.
+
+*/
  
  /* returns -1 on failure, $+[0] on success */
  STATIC SSize_t
  S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  {
-    dVAR;
      const bool utf8_target = reginfo->is_utf8_target;
      const U32 uniflags = UTF8_ALLOW_DEFAULT;
      REGEXP *rex_sv = reginfo->prog;
@@ -5652,8 +6002,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
      SSize_t ln = 0; /* len or last;  init to avoid compiler warning */
      SSize_t endref = 0; /* offset of end of backref when ln is start */
      char *locinput = startpos;
+    char *loceol = reginfo->strend;
      char *pushinput; /* where to continue after a PUSH */
-    I32 nextchr;   /* is always set to UCHARAT(locinput), or -1 at EOS */
+    char *pusheol;   /* where to stop matching (loceol) after a PUSH */
+    U8   *pushsr0;   /* save starting pos of script run */
+    PERL_INT_FAST16_T nextbyte;   /* is always set to UCHARAT(locinput), or -1
+                                     at EOS */
  
      bool result = 0;       /* return value of S_regmatch */
      U32 depth = 0;            /* depth of backtrack stack */
@@ -5714,7 +6068,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  #endif
  
  #ifdef DEBUGGING
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  #endif
  
      /* protect against undef(*^R) */
@@ -5728,7 +6082,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
      st = PL_regmatch_state;
  
-    /* Note that nextchr is a byte even in UTF */
+    /* Note that nextbyte is a byte even in UTF */
      SET_nextchr;
      scan = prog;
  
@@ -5764,7 +6118,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
          to_complement = 0;
  
          SET_nextchr;
-        assert(nextchr < 256 && (nextchr >= 0 || nextchr == NEXTCHR_EOS));
+        assert(nextbyte < 256 && (nextbyte >= 0 || nextbyte == NEXTCHR_EOS));
  
         switch (state_num) {
         case SBOL: /*  /^../ and /\A../  */
@@ -5789,7 +6143,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             /* update the startpoint */
             st->u.keeper.val = rex->offs[0].start;
             rex->offs[0].start = locinput - reginfo->strbeg;
-           PUSH_STATE_GOTO(KEEPS_next, next, locinput);
+           PUSH_STATE_GOTO(KEEPS_next, next, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case KEEPS_next_fail:
@@ -5799,12 +6154,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             NOT_REACHED; /* NOTREACHED */
  
         case MEOL: /* /..$/m  */
-           if (!NEXTCHR_IS_EOS && nextchr != '\n')
+           if (!NEXTCHR_IS_EOS && nextbyte != '\n')
                 sayNO;
             break;
  
         case SEOL: /* /..$/  */
-           if (!NEXTCHR_IS_EOS && nextchr != '\n')
+           if (!NEXTCHR_IS_EOS && nextbyte != '\n')
                 sayNO;
             if (reginfo->strend - locinput > 1)
                 sayNO;
@@ -5816,13 +6171,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
  
         case SANY: /*  /./s  */
-           if (NEXTCHR_IS_EOS)
+           if (NEXTCHR_IS_EOS || locinput >= loceol)
                 sayNO;
              goto increment_locinput;
  
         case REG_ANY: /*  /./  */
-           if ((NEXTCHR_IS_EOS) || nextchr == '\n')
+           if (   NEXTCHR_IS_EOS
+                || locinput >= loceol
+                || nextbyte == '\n')
+            {
                 sayNO;
+            }
              goto increment_locinput;
  
  
@@ -5832,7 +6191,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* In this case the charclass data is available inline so
                 we can fail fast without a lot of extra overhead. 
               */
-            if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
+            if ( !   NEXTCHR_IS_EOS
+                &&   locinput < loceol
+                && ! ANYOF_BITMAP_TEST(scan, nextbyte))
+            {
                  DEBUG_EXECUTE_r(
                      Perl_re_exec_indentf( aTHX_  "%sTRIE: failed to match trie start class...%s\n",
                                depth, PL_colors[4], PL_colors[5])
@@ -5900,7 +6262,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
                      if (utf8_target
                          && ! NEXTCHR_IS_EOS
-                        && UTF8_IS_ABOVE_LATIN1(nextchr)
+                        && UTF8_IS_ABOVE_LATIN1(nextbyte)
                          && scan->flags == EXACTL)
                      {
                          /* We only output for EXACTL, as we let the folder
@@ -5911,7 +6273,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      }
                  }
                  if (   trie->bitmap
-                    && (NEXTCHR_IS_EOS || !TRIE_BITMAP_TEST(trie, nextchr)))
+                    && (     NEXTCHR_IS_EOS
+                        ||   locinput >= loceol
+                        || ! TRIE_BITMAP_TEST(trie, nextbyte)))
                  {
                     if (trie->states[ state ].wordnum) {
                          DEBUG_EXECUTE_r(
@@ -5949,7 +6313,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                    shortest accept state and the wordnum of the longest
                    accept state */
  
-               while ( state && uc <= (U8*)(reginfo->strend) ) {
+               while ( state && uc <= (U8*)(loceol) ) {
                      U32 base = trie->states[ state ].trans.base;
                      UV uvc = 0;
                      U16 charid = 0;
@@ -5984,10 +6348,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                     });
  
                     /* read a char and goto next state */
-                   if ( base && (foldlen || uc < (U8*)(reginfo->strend))) {
+                   if ( base && (foldlen || uc < (U8*)(loceol))) {
                         I32 offset;
                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
-                                             (U8 *) reginfo->strend, uscan,
+                                             (U8 *) loceol, uscan,
                                               len, uvc, charid, foldlen,
                                               foldbuf, uniflags);
                         charcount++;
@@ -6111,6 +6475,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
                     while (chars) {
                         if (utf8_target) {
+                            /* XXX This assumes the length is well-formed, as
+                             * does the UTF8SKIP below */
                             uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len,
                                                     uniflags);
                             uc += len;
@@ -6154,7 +6520,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             });
  
             if ( ST.accepted > 1 || has_cutgroup || ST.jump ) {
-               PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc);
+               PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
             /* only one choice left - just continue */
@@ -6182,6 +6549,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
          }
  #undef  ST
  
+       case LEXACT_REQ8:
+            if (! utf8_target) {
+                sayNO;
+            }
+            /* FALLTHROUGH */
+
+       case LEXACT:
+        {
+           char *s;
+
+           s = STRINGl(scan);
+           ln = STR_LENl(scan);
+            goto join_short_long_exact;
+
         case EXACTL:             /*  /abc/l       */
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
  
@@ -6195,16 +6576,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend);
              }
              goto do_exact;
-       case EXACT_ONLY8:
+       case EXACT_REQ8:
              if (! utf8_target) {
                  sayNO;
              }
              /* FALLTHROUGH */
-       case EXACT: {            /*  /abc/        */
-           char *s;
+
+       case EXACT:             /*  /abc/        */
            do_exact:
-           s = STRING(scan);
-           ln = STR_LEN(scan);
+           s = STRINGs(scan);
+           ln = STR_LENs(scan);
+
+          join_short_long_exact:
             if (utf8_target != is_utf8_pat) {
                 /* The target and the pattern have differing utf8ness. */
                 char *l = locinput;
@@ -6221,7 +6604,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                       * is an invariant, but there are tests in the test suite
                       * dealing with (??{...}) which violate this) */
                     while (s < e) {
-                       if (l >= reginfo->strend
+                       if (   l >= loceol
                              || UTF8_IS_ABOVE_LATIN1(* (U8*) l))
                          {
                              sayNO;
@@ -6245,7 +6628,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 else {
                     /* The target is not utf8, the pattern is utf8. */
                     while (s < e) {
-                        if (l >= reginfo->strend
+                        if (   l >= loceol
                              || UTF8_IS_ABOVE_LATIN1(* (U8*) s))
                          {
                              sayNO;
@@ -6271,8 +6654,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              else {
                  /* The target and the pattern have the same utf8ness. */
                  /* Inline the first character, for speed. */
-                if (reginfo->strend - locinput < ln
-                    || UCHARAT(s) != nextchr
+                if (   loceol - locinput < ln
+                    || UCHARAT(s) != nextbyte
                      || (ln > 1 && memNE(s, locinput, ln)))
                  {
                      sayNO;
@@ -6307,7 +6690,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             fold_array = PL_fold_latin1;
             goto do_exactf;
  
-        case EXACTFU_ONLY8:      /* /abc/iu with something in /abc/ > 255 */
+        case EXACTFU_REQ8:      /* /abc/iu with something in /abc/ > 255 */
              if (! utf8_target) {
                  sayNO;
              }
@@ -6356,8 +6739,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             fold_utf8_flags = 0;
  
           do_exactf:
-           s = STRING(scan);
-           ln = STR_LEN(scan);
+           s = STRINGs(scan);
+           ln = STR_LENs(scan);
  
             if (   utf8_target
                  || is_utf8_pat
@@ -6367,7 +6750,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
               /* Either target or the pattern are utf8, or has the issue where
                * the fold lengths may differ. */
                 const char * const l = locinput;
-               char *e = reginfo->strend;
+               char *e = loceol;
  
                 if (! foldEQ_utf8_flags(l, &e, 0,  utf8_target,
                                          s, 0,  ln, is_utf8_pat,fold_utf8_flags))
@@ -6379,13 +6762,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
  
             /* Neither the target nor the pattern are utf8 */
-           if (UCHARAT(s) != nextchr
+           if (UCHARAT(s) != nextbyte
                  && !NEXTCHR_IS_EOS
-               && UCHARAT(s) != fold_array[nextchr])
+               && UCHARAT(s) != fold_array[nextbyte])
             {
                 sayNO;
             }
-           if (reginfo->strend - locinput < ln)
+           if (loceol - locinput < ln)
                 sayNO;
             if (ln > 1 && ! folder(locinput, s, ln))
                 sayNO;
@@ -6403,10 +6786,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
  
              if (FLAGS(scan) != TRADITIONAL_BOUND) {
-                if (! IN_UTF8_CTYPE_LOCALE) {
-                    Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
-                                                B_ON_NON_UTF8_LOCALE_IS_WRONG);
-                }
+                CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND;
                  goto boundu;
              }
  
@@ -6414,9 +6794,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 if (locinput == reginfo->strbeg)
                     b1 = isWORDCHAR_LC('\n');
                 else {
-                    b1 = isWORDCHAR_LC_utf8_safe(reghop3((U8*)locinput, -1,
-                                                        (U8*)(reginfo->strbeg)),
-                                                 (U8*)(reginfo->strend));
+                    U8 *p = reghop3((U8*)locinput, -1,
+                                    (U8*)(reginfo->strbeg));
+                    b1 = isWORDCHAR_LC_utf8_safe(p, (U8*)(reginfo->strend));
                 }
                  b2 = (NEXTCHR_IS_EOS)
                      ? isWORDCHAR_LC('\n')
@@ -6429,7 +6809,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                       : isWORDCHAR_LC(UCHARAT(locinput - 1));
                  b2 = (NEXTCHR_IS_EOS)
                      ? isWORDCHAR_LC('\n')
-                    : isWORDCHAR_LC(nextchr);
+                    : isWORDCHAR_LC(nextbyte);
             }
              if (to_complement ^ (b1 == b2)) {
                  sayNO;
@@ -6470,7 +6850,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                   : isWORDCHAR_A(UCHARAT(locinput - 1));
              b2 = (NEXTCHR_IS_EOS)
                  ? isWORDCHAR_A('\n')
-                : isWORDCHAR_A(nextchr);
+                : isWORDCHAR_A(nextbyte);
              if (to_complement ^ (b1 == b2)) {
                  sayNO;
              }
@@ -6493,13 +6873,15 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      case TRADITIONAL_BOUND:
                      {
                          bool b1, b2;
-                        b1 = (locinput == reginfo->strbeg)
-                             ? 0 /* isWORDCHAR_L1('\n') */
-                             : isWORDCHAR_utf8_safe(
-                                               reghop3((U8*)locinput,
-                                                       -1,
-                                                       (U8*)(reginfo->strbeg)),
-                                                    (U8*) reginfo->strend);
+                        if (locinput == reginfo->strbeg) {
+                            b1 = 0 /* isWORDCHAR_L1('\n') */;
+                        }
+                        else {
+                            U8 *p = reghop3((U8*)locinput, -1,
+                                            (U8*)(reginfo->strbeg));
+
+                            b1 = isWORDCHAR_utf8_safe(p, (U8*) reginfo->strend);
+                        }
                          b2 = (NEXTCHR_IS_EOS)
                              ? 0 /* isWORDCHAR_L1('\n') */
                              : isWORDCHAR_utf8_safe((U8*)locinput,
@@ -6600,7 +6982,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                              : isWORDCHAR_L1(UCHARAT(locinput - 1));
                          b2 = (NEXTCHR_IS_EOS)
                              ? 0 /* isWORDCHAR_L1('\n') */
-                            : isWORDCHAR_L1(nextchr);
+                            : isWORDCHAR_L1(nextbyte);
                          match = cBOOL(b1 != b2);
                          break;
                      }
@@ -6673,15 +7055,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
          case ANYOFPOSIXL:
         case ANYOFL:  /*  /[abc]/l      */
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+            CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(scan);
  
-            if (ANYOFL_UTF8_LOCALE_REQD(FLAGS(scan)) && ! IN_UTF8_CTYPE_LOCALE)
-            {
-              Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required);
-            }
              /* FALLTHROUGH */
         case ANYOFD:  /*   /[abc]/d       */
         case ANYOF:  /*   /[abc]/       */
-            if (NEXTCHR_IS_EOS)
+            if (NEXTCHR_IS_EOS || locinput >= loceol)
                  sayNO;
             if (  (! utf8_target || UTF8_IS_INVARIANT(*locinput))
                 && ! (ANYOF_FLAGS(scan) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP))
@@ -6692,7 +7071,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 locinput++;
              }
              else {
-               if (!reginclass(rex, scan, (U8*)locinput, (U8*)reginfo->strend,
+               if (!reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
                                                                     utf8_target))
                  {
                     sayNO;
@@ -6702,14 +7081,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
  
          case ANYOFM:
-            if (NEXTCHR_IS_EOS || (UCHARAT(locinput) & FLAGS(scan)) != ARG(scan)) {
+            if (   NEXTCHR_IS_EOS
+                || (UCHARAT(locinput) & FLAGS(scan)) != ARG(scan)
+                || locinput >= loceol)
+            {
                  sayNO;
              }
              locinput++; /* ANYOFM is always single byte */
              break;
  
          case NANYOFM:
-            if (NEXTCHR_IS_EOS || (UCHARAT(locinput) & FLAGS(scan)) == ARG(scan)) {
+            if (   NEXTCHR_IS_EOS
+                || (UCHARAT(locinput) & FLAGS(scan)) == ARG(scan)
+                || locinput >= loceol)
+            {
                  sayNO;
              }
              goto increment_locinput;
@@ -6718,7 +7103,34 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
          case ANYOFH:
              if (   ! utf8_target
                  ||   NEXTCHR_IS_EOS
-               || ! reginclass(rex, scan, (U8*)locinput, (U8*)reginfo->strend,
+                ||   ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8(*locinput)
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHb:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                ||   ANYOF_FLAGS(scan) != (U8) *locinput
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                  utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHr:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                || ! inRANGE((U8) NATIVE_UTF8_TO_I8(*locinput),
+                             LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)),
+                             HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)))
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
                                                                     utf8_target))
              {
                  sayNO;
@@ -6726,6 +7138,69 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              goto increment_locinput;
              break;
  
+        case ANYOFHs:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                ||   loceol - locinput < FLAGS(scan)
+                ||   memNE(locinput, ((struct regnode_anyofhs *) scan)->string, FLAGS(scan))
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFR:
+            if (NEXTCHR_IS_EOS) {
+                sayNO;
+            }
+
+            if (utf8_target) {
+                if (    ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8(*locinput)
+                   || ! withinCOUNT(utf8_to_uvchr_buf((U8 *) locinput,
+                                                (U8 *) reginfo->strend,
+                                                NULL),
+                                    ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            else {
+                if (! withinCOUNT((U8) *locinput,
+                                  ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFRb:
+            if (NEXTCHR_IS_EOS) {
+                sayNO;
+            }
+
+            if (utf8_target) {
+                if (     ANYOF_FLAGS(scan) != (U8) *locinput
+                    || ! withinCOUNT(utf8_to_uvchr_buf((U8 *) locinput,
+                                                (U8 *) reginfo->strend,
+                                                NULL),
+                                     ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            else {
+                if (! withinCOUNT((U8) *locinput,
+                                  ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            goto increment_locinput;
+            break;
+
          /* The argument (FLAGS) to all the POSIX node types is the class number
           * */
  
@@ -6735,14 +7210,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case POSIXL:    /* \w or [:punct:] etc. under /l */
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-            if (NEXTCHR_IS_EOS)
+            if (NEXTCHR_IS_EOS || locinput >= loceol)
                  sayNO;
  
              /* Use isFOO_lc() for characters within Latin1.  (Note that
               * UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
               * wouldn't be invariant) */
-            if (UTF8_IS_INVARIANT(nextchr) || ! utf8_target) {
-                if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextchr)))) {
+            if (UTF8_IS_INVARIANT(nextbyte) || ! utf8_target) {
+                if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextbyte)))) {
                      sayNO;
                  }
  
@@ -6760,7 +7235,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* Here is a UTF-8 variant code point below 256 and the target is
               * UTF-8 */
              if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan),
-                                            EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
+                                            EIGHT_BIT_UTF8_TO_NATIVE(nextbyte,
                                              *(locinput + 1))))))
              {
                  sayNO;
@@ -6780,12 +7255,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case NPOSIXA:   /* \W or [:^punct:] etc. under /a */
  
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
              /* All UTF-8 variants match */
-            if (! UTF8_IS_INVARIANT(nextchr)) {
+            if (! UTF8_IS_INVARIANT(nextbyte)) {
                  goto increment_locinput;
              }
  
@@ -6799,13 +7274,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
               * UTF-8, and also from NPOSIXA even in UTF-8 when the current
               * character is a single byte */
  
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
            join_nposixa:
  
-            if (! (to_complement ^ cBOOL(_generic_isCC_A(nextchr,
+            if (! (to_complement ^ cBOOL(_generic_isCC_A(nextbyte,
                                                                  FLAGS(scan)))))
              {
                  sayNO;
@@ -6822,15 +7297,15 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case POSIXU:    /* \w or [:punct:] etc. under /u */
            utf8_posix:
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
              /* Use _generic_isCC() for characters within Latin1.  (Note that
               * UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
               * wouldn't be invariant) */
-            if (UTF8_IS_INVARIANT(nextchr) || ! utf8_target) {
-                if (! (to_complement ^ cBOOL(_generic_isCC(nextchr,
+            if (UTF8_IS_INVARIANT(nextbyte) || ! utf8_target) {
+                if (! (to_complement ^ cBOOL(_generic_isCC(nextbyte,
                                                             FLAGS(scan)))))
                  {
                      sayNO;
@@ -6839,7 +7314,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              }
              else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(locinput, reginfo->strend)) {
                  if (! (to_complement
-                       ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
+                       ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextbyte,
                                                                 *(locinput + 1)),
                                               FLAGS(scan)))))
                  {
@@ -6897,22 +7372,22 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                          }
                          break;
                  }
-                locinput += UTF8SKIP(locinput);
+                locinput += UTF8_SAFE_SKIP(locinput, reginfo->strend);
              }
              break;
  
         case CLUMP: /* Match \X: logical Unicode character.  This is defined as
                        a Unicode extended Grapheme Cluster */
-           if (NEXTCHR_IS_EOS)
+           if (NEXTCHR_IS_EOS || locinput >= loceol)
                 sayNO;
             if  (! utf8_target) {
  
                 /* Match either CR LF  or '.', as all the other possibilities
                  * require utf8 */
                 locinput++;         /* Match the . or CR */
-               if (nextchr == '\r' /* And if it was CR, and the next is LF,
+               if (nextbyte == '\r' /* And if it was CR, and the next is LF,
                                        match the LF */
-                   && locinput < reginfo->strend
+                   && locinput <  loceol
                     && UCHARAT(locinput) == '\n')
                  {
                      locinput++;
@@ -6929,7 +7404,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                   * current character.  (There is always a break at the
                   * end-of-input) */
                  locinput += UTF8SKIP(locinput);
-                while (locinput < reginfo->strend) {
+                while (locinput < loceol) {
                      GCB_enum cur_gcb = getGCB_VAL_UTF8((U8*) locinput,
                                                           (U8*) reginfo->strend);
                      if (isGCB(prev_gcb, cur_gcb,
@@ -6947,7 +7422,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
             break;
              
-       case NREFFL:  /*  /\g{name}/il  */
+       case REFFLN:  /*  /\g{name}/il  */
         {   /* The capture buffer cases.  The ones beginning with N for the
                named buffers just convert to the equivalent numbered and
                pretend they were called as the corresponding numbered buffer
@@ -6967,28 +7442,28 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             utf8_fold_flags = FOLDEQ_LOCALE;
             goto do_nref;
  
-       case NREFFA:  /*  /\g{name}/iaa  */
+       case REFFAN:  /*  /\g{name}/iaa  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFA;
             utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_nref;
  
-       case NREFFU:  /*  /\g{name}/iu  */
+       case REFFUN:  /*  /\g{name}/iu  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFU;
             utf8_fold_flags = 0;
             goto do_nref;
  
-       case NREFF:  /*  /\g{name}/i  */
+       case REFFN:  /*  /\g{name}/i  */
             folder = foldEQ;
             fold_array = PL_fold;
             type = REFF;
             utf8_fold_flags = 0;
             goto do_nref;
  
-       case NREF:  /*  /\g{name}/   */
+       case REFN:  /*  /\g{name}/   */
             type = REF;
             folder = NULL;
             fold_array = NULL;
@@ -7051,11 +7526,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             if (type != REF     /* REF can do byte comparison */
                 && (utf8_target || type == REFFU || type == REFFL))
             {
-               char * limit = reginfo->strend;
+               char * limit = loceol;
  
                 /* This call case insensitively compares the entire buffer
                     * at s, with the current input starting at locinput, but
-                    * not going off the end given by reginfo->strend, and
+                    * not going off the end given by loceol, and
                      * returns in <limit> upon success, how much of the
                      * current input was matched */
                 if (! foldEQ_utf8_flags(s, NULL, endref - ln, utf8_target,
@@ -7068,13 +7543,16 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
  
             /* Not utf8:  Inline the first character, for speed. */
-           if (!NEXTCHR_IS_EOS &&
-                UCHARAT(s) != nextchr &&
-               (type == REF ||
-                UCHARAT(s) != fold_array[nextchr]))
+           if ( ! NEXTCHR_IS_EOS
+                && locinput < loceol
+                && UCHARAT(s) != nextbyte
+                && (   type == REF
+                    || UCHARAT(s) != fold_array[nextbyte]))
+            {
                 sayNO;
+            }
             ln = endref - ln;
-           if (locinput + ln > reginfo->strend)
+           if (locinput + ln > loceol)
                 sayNO;
             if (ln > 1 && (type == REF
                            ? memNE(s, locinput, ln)
@@ -7134,7 +7612,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  rex->recurse_locinput[arg]= locinput;
  
                  DEBUG_r({
-                    GET_RE_DEBUG_FLAGS_DECL;
+                    DECLARE_AND_GET_RE_DEBUG_FLAGS;
                      DEBUG_STACK_r({
                          Perl_re_exec_indentf( aTHX_
                              "entering GOSUB, prev_recurse_locinput=%p recurse_locinput[%d]=%p\n",
@@ -7153,7 +7631,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* NOTREACHED */
  
          case EVAL:  /*   /(?{...})B/   /(??{A})B/  and  /(?(?{...})X|Y)B/   */
-            if (cur_eval && cur_eval->locinput==locinput) {
+            if (logical == 2 && cur_eval && cur_eval->locinput==locinput) {
                 if ( ++nochange_depth > max_nochange_depth )
                      Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
              } else {
@@ -7381,7 +7859,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  PL_curpm = PL_reg_curpm;
  
                 if (logical != 2) {
-                    PUSH_STATE_GOTO(EVAL_B, next, locinput);
+                    PUSH_STATE_GOTO(EVAL_B, next, locinput, loceol,
+                                    script_run_begin);
                     /* NOTREACHED */
                  }
             }
@@ -7481,7 +7960,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 ST.prev_eval = cur_eval;
                 cur_eval = st;
                 /* now continue from first node in postoned RE */
-               PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput);
+               PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput,
+                                    loceol, script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
         }
  
@@ -7637,7 +8117,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
             break;
  
-       case NGROUPP:  /*  (?(<name>))  */
+       case GROUPPN:  /*  (?(<name>))  */
             /* reg_check_named_buff_matched returns 0 for no match */
             sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
             break;
@@ -7781,7 +8261,8 @@ NULL
             ST.count = -1;      /* this will be updated by WHILEM */
             ST.lastloc = NULL;  /* this will be updated by WHILEM */
  
-           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput);
+           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput, loceol,
+                                script_run_begin);
             NOT_REACHED; /* NOTREACHED */
         }
  
@@ -7829,7 +8310,8 @@ NULL
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
  
-               PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput);
+               PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
  
@@ -7937,7 +8419,7 @@ NULL
                 ST.save_curlyx = cur_curlyx;
                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
-                                    locinput);
+                                    locinput, loceol, script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
  
@@ -7948,7 +8430,8 @@ NULL
                              maxopenparen);
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
-               PUSH_STATE_GOTO(WHILEM_A_max, A, locinput);
+               PUSH_STATE_GOTO(WHILEM_A_max, A, locinput, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
             goto do_whilem_B_max;
@@ -8000,7 +8483,7 @@ NULL
             ST.save_curlyx = cur_curlyx;
             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
-                                locinput);
+                                locinput, loceol, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
@@ -8031,7 +8514,7 @@ NULL
             REGCP_SET(ST.lastcp);
             PUSH_STATE_GOTO(WHILEM_A_min,
                 /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
-                locinput);
+                locinput, loceol, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
  #undef  ST
@@ -8053,9 +8536,11 @@ NULL
  
             /* Now go into the branch */
             if (has_cutgroup) {
-               PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput);
+               PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
+                                    script_run_begin);
             } else {
-               PUSH_STATE_GOTO(BRANCH_next, scan, locinput);
+               PUSH_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
+                                script_run_begin);
             }
             NOT_REACHED; /* NOTREACHED */
  
@@ -8063,7 +8548,8 @@ NULL
              sv_yes_mark = st->u.mark.mark_name = scan->flags
                  ? MUTABLE_SV(rexi->data->data[ ARG( scan ) ])
                  : NULL;
-            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
+            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol,
+                            script_run_begin);
              NOT_REACHED; /* NOTREACHED */
  
          case CUTGROUP_next_fail:
@@ -8140,7 +8626,8 @@ NULL
                 goto curlym_do_B;
  
           curlym_do_A: /* execute the A in /A{m,n}B/  */
-           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput); /* match A */
+           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput, loceol, /* match A */
+                                script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case CURLYM_A: /* we've just matched an A */
@@ -8209,9 +8696,16 @@ NULL
                      depth, (IV)ST.count)
                 );
             if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
-                if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
-                    if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
-                        && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
+                if (! UTF8_IS_INVARIANT(nextbyte) && utf8_target) {
+
+                           /* (We can use memEQ and memNE in this file without
+                            * having to worry about one being shorter than the
+                            * other, since the first byte of each gives the
+                            * length of the character) */
+                    if (   memNE(locinput, ST.c1_utf8, UTF8_SAFE_SKIP(locinput,
+                                                              reginfo->strend))
+                        && memNE(locinput, ST.c2_utf8, UTF8_SAFE_SKIP(locinput,
+                                                             reginfo->strend)))
                      {
                          /* simulate B failing */
                          DEBUG_OPTIMISE_r(
@@ -8225,12 +8719,12 @@ NULL
                          goto reenter_switch;
                      }
                  }
-                else if (nextchr != ST.c1 && nextchr != ST.c2) {
+                else if (nextbyte != ST.c1 && nextbyte != ST.c2) {
                      /* simulate B failing */
                      DEBUG_OPTIMISE_r(
                          Perl_re_exec_indentf( aTHX_  "CURLYM Fast bail next target=0x%X c1=0x%X c2=0x%X\n",
                              depth,
-                            (int) nextchr, ST.c1, ST.c2)
+                            (int) nextbyte, ST.c1, ST.c2)
                      );
                      state_num = CURLYM_B_fail;
                      goto reenter_switch;
@@ -8257,7 +8751,8 @@ NULL
                 }
             }
             
-           PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput); /* match B */
+           PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput, loceol,   /* match B */
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case CURLYM_B_fail: /* just failed to match a B */
@@ -8320,7 +8815,7 @@ NULL
              if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
              {
                  char *li = locinput;
-                if (!regrepeat(rex, &li, scan, reginfo, 1))
+                if (!regrepeat(rex, &li, scan, loceol, reginfo, 1))
                     sayNO;
                  SET_locinput(li);
                  goto fake_end;
@@ -8376,7 +8871,7 @@ NULL
                  char *li = locinput;
                 minmod = 0;
                 if (ST.min &&
-                        regrepeat(rex, &li, ST.A, reginfo, ST.min)
+                        regrepeat(rex, &li, ST.A, loceol, reginfo, ST.min)
                              < ST.min)
                     sayNO;
                  SET_locinput(li);
@@ -8390,7 +8885,7 @@ NULL
                 /* set ST.maxpos to the furthest point along the
                  * string that could possibly match */
                 if  (ST.max == REG_INFTY) {
-                   ST.maxpos = reginfo->strend - 1;
+                   ST.maxpos = loceol - 1;
                     if (utf8_target)
                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
                             ST.maxpos--;
@@ -8398,13 +8893,13 @@ NULL
                 else if (utf8_target) {
                     int m = ST.max - ST.min;
                     for (ST.maxpos = locinput;
-                        m >0 && ST.maxpos < reginfo->strend; m--)
+                        m >0 && ST.maxpos <  loceol; m--)
                         ST.maxpos += UTF8SKIP(ST.maxpos);
                 }
                 else {
                     ST.maxpos = locinput + ST.max - ST.min;
-                   if (ST.maxpos >= reginfo->strend)
-                       ST.maxpos = reginfo->strend - 1;
+                   if (ST.maxpos >=  loceol)
+                       ST.maxpos =  loceol - 1;
                 }
                 goto curly_try_B_min_known;
  
@@ -8413,7 +8908,7 @@ NULL
                  /* avoid taking address of locinput, so it can remain
                   * a register var */
                  char *li = locinput;
-                ST.count = regrepeat(rex, &li, ST.A, reginfo, ST.max);
+                ST.count = regrepeat(rex, &li, ST.A, loceol, reginfo, ST.max);
                 if (ST.count < ST.min)
                     sayNO;
                  SET_locinput(li);
@@ -8446,7 +8941,7 @@ NULL
              if (ST.c1 == CHRTEST_VOID) {
                  /* failed -- move forward one */
                  char *li = locinput;
-                if (!regrepeat(rex, &li, ST.A, reginfo, 1)) {
+                if (!regrepeat(rex, &li, ST.A, loceol, reginfo, 1)) {
                      sayNO;
                  }
                  locinput = li;
@@ -8473,20 +8968,26 @@ NULL
                     n = (ST.oldloc == locinput) ? 0 : 1;
                     if (ST.c1 == ST.c2) {
                         /* set n to utf8_distance(oldloc, locinput) */
-                       while (locinput <= ST.maxpos
-                              && memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)))
+                       while (    locinput <= ST.maxpos
+                               &&  locinput < loceol
+                               &&  memNE(locinput, ST.c1_utf8,
+                                    UTF8_SAFE_SKIP(locinput, reginfo->strend)))
                          {
-                           locinput += UTF8SKIP(locinput);
+                           locinput += UTF8_SAFE_SKIP(locinput,
+                                                       reginfo->strend);
                             n++;
                         }
                     }
                     else {
                         /* set n to utf8_distance(oldloc, locinput) */
-                       while (locinput <= ST.maxpos
-                              && memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
-                              && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
+                       while (   locinput <= ST.maxpos
+                               && locinput < loceol
+                               && memNE(locinput, ST.c1_utf8,
+                                     UTF8_SAFE_SKIP(locinput, reginfo->strend))
+                               && memNE(locinput, ST.c2_utf8,
+                                    UTF8_SAFE_SKIP(locinput, reginfo->strend)))
                          {
-                           locinput += UTF8SKIP(locinput);
+                           locinput += UTF8_SAFE_SKIP(locinput, reginfo->strend);
                             n++;
                         }
                     }
@@ -8543,7 +9044,7 @@ NULL
                       * locinput matches */
                      char *li = ST.oldloc;
                     ST.count += n;
-                    if (regrepeat(rex, &li, ST.A, reginfo, n) < n)
+                    if (regrepeat(rex, &li, ST.A, loceol, reginfo, n) < n)
                         sayNO;
                      assert(n == REG_INFTY || locinput == li);
                 }
@@ -8551,34 +9052,36 @@ NULL
  
            curly_try_B_min:
              CURLY_SETPAREN(ST.paren, ST.count);
-            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
+            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
  
            curly_try_B_max:
             /* a successful greedy match: now try to match B */
             {
-               bool could_match = locinput < reginfo->strend;
+               bool could_match = locinput <  loceol;
  
                 /* If it could work, try it. */
                  if (ST.c1 != CHRTEST_VOID && could_match) {
                      if (! UTF8_IS_INVARIANT(UCHARAT(locinput)) && utf8_target)
                      {
-                        could_match = memEQ(locinput,
-                                            ST.c1_utf8,
-                                            UTF8SKIP(locinput))
-                                    || memEQ(locinput,
-                                             ST.c2_utf8,
-                                             UTF8SKIP(locinput));
+                        could_match =  memEQ(locinput, ST.c1_utf8,
+                                             UTF8_SAFE_SKIP(locinput,
+                                                            reginfo->strend))
+                                    || memEQ(locinput, ST.c2_utf8,
+                                             UTF8_SAFE_SKIP(locinput,
+                                                            reginfo->strend));
                      }
                      else {
-                        could_match = UCHARAT(locinput) == ST.c1
-                                      || UCHARAT(locinput) == ST.c2;
+                        could_match =   UCHARAT(locinput) == ST.c1
+                                     || UCHARAT(locinput) == ST.c2;
                      }
                  }
                  if (ST.c1 == CHRTEST_VOID || could_match) {
                     CURLY_SETPAREN(ST.paren, ST.count);
-                   PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput);
+                   PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput, loceol,
+                                    script_run_begin);
                     NOT_REACHED; /* NOTREACHED */
                 }
             }
@@ -8633,8 +9136,9 @@ NULL
  
                  SET_RECURSE_LOCINPUT("FAKE-END[after]", cur_eval->locinput);
  
-                PUSH_YES_STATE_GOTO(EVAL_postponed_AB, st->u.eval.prev_eval->u.eval.B,
-                                    locinput); /* match B */
+                PUSH_YES_STATE_GOTO(EVAL_postponed_AB,          /* match B */
+                                    st->u.eval.prev_eval->u.eval.B,
+                                    locinput, loceol, script_run_begin);
             }
  
             if (locinput < reginfo->till) {
@@ -8658,40 +9162,61 @@ NULL
  #undef  ST
  #define ST st->u.ifmatch
  
-        {
-            char *newstart;
-
         case SUSPEND:   /* (?>A) */
             ST.wanted = 1;
-           newstart = locinput;
+           ST.start = locinput;
+           ST.end = loceol;
+            ST.count = 1;
             goto do_ifmatch;    
  
-       case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
+       case UNLESSM:   /* -ve lookaround: (?!A), or with 'flags', (?<!A) */
             ST.wanted = 0;
             goto ifmatch_trivial_fail_test;
  
-       case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
+       case IFMATCH:   /* +ve lookaround: (?=A), or with 'flags', (?<=A) */
             ST.wanted = 1;
           ifmatch_trivial_fail_test:
-           if (scan->flags) {
-               char * const s = HOPBACKc(locinput, scan->flags);
-               if (!s) {
-                   /* trivial fail */
-                   if (logical) {
-                       logical = 0;
-                       sw = 1 - cBOOL(ST.wanted);
-                   }
-                   else if (ST.wanted)
-                       sayNO;
-                   next = scan + ARG(scan);
-                   if (next == scan)
-                       next = NULL;
-                   break;
-               }
-               newstart = s;
+            ST.count = scan->next_off + 1; /* next_off repurposed to be
+                                              lookbehind count, requires
+                                              non-zero flags */
+           if (! scan->flags) {    /* 'flags' zero means lookahed */
+
+                /* Lookahead starts here and ends at the normal place */
+               ST.start = locinput;
+               ST.end = loceol;
+            }
+           else {
+                PERL_UINT_FAST8_T back_count = scan->flags;
+               char * s;
+
+                /* Lookbehind can look beyond the current position */
+               ST.end = loceol;
+
+                /* ... and starts at the first place in the input that is in
+                 * the range of the possible start positions */
+                for (; ST.count > 0; ST.count--, back_count--) {
+                    s = HOPBACKc(locinput, back_count);
+                    if (s) {
+                        ST.start = s;
+                        goto do_ifmatch;
+                    }
+                }
+
+                /* If the lookbehind doesn't start in the actual string, is a
+                 * trivial match failure */
+                if (logical) {
+                    logical = 0;
+                    sw = 1 - cBOOL(ST.wanted);
+                }
+                else if (ST.wanted)
+                    sayNO;
+
+                /* Here, we didn't want it to match, so is actually success */
+                next = scan + ARG(scan);
+                if (next == scan)
+                    next = NULL;
+                break;
             }
-           else
-               newstart = locinput;
  
           do_ifmatch:
             ST.me = scan;
@@ -8699,29 +9224,48 @@ NULL
             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
             
             /* execute body of (?...A) */
-           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), newstart);
+           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), ST.start,
+                                ST.end, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
-        }
+
+        {
+            bool matched;
  
         case IFMATCH_A_fail: /* body of (?...A) failed */
-           ST.wanted = !ST.wanted;
-           /* FALLTHROUGH */
+           if (! ST.logical && ST.count > 1) {
+
+                /* It isn't a real failure until we've tried all starting
+                 * positions.  Move to the next starting position and retry */
+                ST.count--;
+                ST.start = HOPc(ST.start, 1);
+                scan = ST.me;
+                logical = ST.logical;
+                goto do_ifmatch;
+            }
+
+            /* Here, all starting positions have been tried. */
+           matched = FALSE;
+           goto ifmatch_done;
  
         case IFMATCH_A: /* body of (?...A) succeeded */
-           if (ST.logical) {
-               sw = cBOOL(ST.wanted);
-           }
-           else if (!ST.wanted)
-               sayNO;
+           matched = TRUE;
+          ifmatch_done:
+            sw = matched == ST.wanted;
+           if (! ST.logical && !sw) {
+                sayNO;
+            }
  
             if (OP(ST.me) != SUSPEND) {
                  /* restore old position except for (?>...) */
                 locinput = st->locinput;
+                loceol = st->loceol;
+                script_run_begin = st->sr0;
             }
             scan = ST.me + ARG(ST.me);
             if (scan == ST.me)
                 scan = NULL;
             continue; /* execute B */
+        }
  
  #undef ST
  
@@ -8733,13 +9277,14 @@ NULL
             break;
  
         case COMMIT:  /*  (*COMMIT)  */
-           reginfo->cutpoint = reginfo->strend;
+           reginfo->cutpoint = loceol;
             /* FALLTHROUGH */
  
         case PRUNE:   /*  (*PRUNE)   */
              if (scan->flags)
                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
-           PUSH_STATE_GOTO(COMMIT_next, next, locinput);
+           PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case COMMIT_next_fail:
@@ -8769,7 +9314,8 @@ NULL
                  = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
              mark_state = st;
              ST.mark_loc = locinput;
-            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput);
+            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput, loceol,
+                                script_run_begin);
              NOT_REACHED; /* NOTREACHED */
  
          case MARKPOINT_next:
@@ -8802,7 +9348,8 @@ NULL
                  /* (*SKIP) : if we fail we cut here*/
                  ST.mark_name = NULL;
                  ST.mark_loc = locinput;
-                PUSH_STATE_GOTO(SKIP_next,next, locinput);
+                PUSH_STATE_GOTO(SKIP_next,next, locinput, loceol,
+                                script_run_begin);
              } else {
                  /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was, 
                     otherwise do nothing.  Meaning we need to scan 
@@ -8815,7 +9362,8 @@ NULL
                                  find ) ) 
                      {
                          ST.mark_name = find;
-                        PUSH_STATE_GOTO( SKIP_next, next, locinput);
+                        PUSH_STATE_GOTO( SKIP_next, next, locinput, loceol,
+                                         script_run_begin);
                      }
                      cur = cur->u.mark.prev_mark;
                  }
@@ -8844,7 +9392,7 @@ NULL
  #undef ST
  
          case LNBREAK: /* \R */
-            if ((n=is_LNBREAK_safe(locinput, reginfo->strend, utf8_target))) {
+            if ((n=is_LNBREAK_safe(locinput, loceol, utf8_target))) {
                  locinput += n;
              } else
                  sayNO;
@@ -8860,10 +9408,10 @@ NULL
            increment_locinput:
              assert(!NEXTCHR_IS_EOS);
              if (utf8_target) {
-                locinput += PL_utf8skip[nextchr];
+                locinput += PL_utf8skip[nextbyte];
                  /* locinput is allowed to go 1 char off the end (signifying
                   * EOS), but not 2+ */
-                if (locinput > reginfo->strend)
+                if (locinput >  loceol)
                      sayNO;
              }
              else
@@ -8886,8 +9434,10 @@ NULL
         /* push a new regex state, then continue at scan  */
         {
             regmatch_state *newst;
+            DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
-           DEBUG_STACK_r({
+            DEBUG_r( /* DEBUG_STACK_r */
+              if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_STACK)) {
                 regmatch_state *cur = st;
                 regmatch_state *curyes = yes_state;
                 U32 i;
@@ -8906,17 +9456,21 @@ NULL
                      if (curyes == cur)
                         curyes = cur->u.yes.prev_yes_state;
                  }
-            } else 
+            } else {
                  DEBUG_STATE_pp("push")
-            );
+            });
             depth++;
             st->locinput = locinput;
+           st->loceol = loceol;
+            st->sr0 = script_run_begin;
             newst = st+1; 
             if (newst >  SLAB_LAST(PL_regmatch_slab))
                 newst = S_push_slab(aTHX);
             PL_regmatch_state = newst;
  
             locinput = pushinput;
+            loceol = pusheol;
+            script_run_begin = pushsr0;
             st = newst;
             continue;
              /* NOTREACHED */
@@ -8969,8 +9523,11 @@ NULL
         yes_state = st->u.yes.prev_yes_state;
         PL_regmatch_state = st;
          
-        if (no_final)
+        if (no_final) {
              locinput= st->locinput;
+            loceol= st->loceol;
+            script_run_begin = st->sr0;
+        }
         state_num = st->resume_state + no_final;
         goto reenter_switch;
      }
@@ -9020,6 +9577,8 @@ NULL
         }
         PL_regmatch_state = st;
         locinput= st->locinput;
+       loceol= st->loceol;
+        script_run_begin = st->sr0;
  
         DEBUG_STATE_pp("pop");
         depth--;
@@ -9071,22 +9630,23 @@ NULL
   * What 'simple' means is a node which can be the operand of a quantifier like
   * '+', or {1,3}
   *
- * startposp - pointer a pointer to the start position.  This is updated
+ * startposp - pointer to a pointer to the start position.  This is updated
   *             to point to the byte following the highest successful
   *             match.
   * p         - the regnode to be repeatedly matched against.
- * reginfo   - struct holding match state, such as strend
+ * loceol    - pointer to the end position beyond which we aren't supposed to
+ *             look.
+ * reginfo   - struct holding match state, such as utf8_target
   * max       - maximum number of things to match.
   * depth     - (for debugging) backtracking depth.
   */
  STATIC I32
  S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
-            regmatch_info *const reginfo, I32 max _pDEPTH)
+            char * loceol, regmatch_info *const reginfo, I32 max _pDEPTH)
  {
-    dVAR;
      char *scan;     /* Pointer to current position in target string */
      I32 c;
-    char *loceol = reginfo->strend;   /* local version */
+    char *this_eol = loceol;   /* potentially adjusted version. */
      I32 hardcount = 0;  /* How many matches so far */
      bool utf8_target = reginfo->is_utf8_target;
      unsigned int to_complement = 0;  /* Invert the result? */
@@ -9095,18 +9655,24 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      PERL_ARGS_ASSERT_REGREPEAT;
  
+    /* This routine is structured so that we switch on the input OP.  Each OP
+     * case: statement contains a loop to repeatedly apply the OP, advancing
+     * the input until it fails, or reaches the end of the input, or until it
+     * reaches the upper limit of matches. */
+
      scan = *startposp;
-    if (max == REG_INFTY)
+    if (max == REG_INFTY)   /* This is a special marker to go to the platform's
+                               max */
         max = I32_MAX;
-    else if (! utf8_target && loceol - scan > max)
-       loceol = scan + max;
+    else if (! utf8_target && this_eol - scan > max)
+       this_eol = scan + max;
  
-    /* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
+    /* Here, for the case of a non-UTF-8 target we have adjusted <this_eol> down
       * to the maximum of how far we should go in it (leaving it set to the real
       * end, if the maximum permissible would take us beyond that).  This allows
-     * us to make the loop exit condition that we haven't gone past <loceol> to
+     * us to make the loop exit condition that we haven't gone past <this_eol> to
       * also mean that we haven't exceeded the max permissible count, saving a
-     * test each time through the loop.  But it assumes that the OP matches a
+     * test each time through the loops.  But it assumes that the OP matches a
       * single byte, which is true for most of the OPs below when applied to a
       * non-UTF-8 target.  Those relatively few OPs that don't have this
       * characteristic will have to compensate.
@@ -9114,40 +9680,56 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
       * There is no adjustment for UTF-8 targets, as the number of bytes per
       * character varies.  OPs will have to test both that the count is less
       * than the max permissible (using <hardcount> to keep track), and that we
-     * are still within the bounds of the string (using <loceol>.  A few OPs
+     * are still within the bounds of the string (using <this_eol>.  A few OPs
       * match a single byte no matter what the encoding.  They can omit the max
       * test if, for the UTF-8 case, they do the adjustment that was skipped
       * above.
       *
       * Thus, the code above sets things up for the common case; and exceptional
       * cases need extra work; the common case is to make sure <scan> doesn't
-     * go past <loceol>, and for UTF-8 to also use <hardcount> to make sure the
+     * go past <this_eol>, and for UTF-8 to also use <hardcount> to make sure the
       * count doesn't exceed the maximum permissible */
  
      switch (OP(p)) {
      case REG_ANY:
         if (utf8_target) {
-           while (scan < loceol && hardcount < max && *scan != '\n') {
+           while (scan < this_eol && hardcount < max && *scan != '\n') {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         } else {
-            scan = (char *) memchr(scan, '\n', loceol - scan);
+            scan = (char *) memchr(scan, '\n', this_eol - scan);
              if (! scan) {
-                scan = loceol;
+                scan = this_eol;
              }
         }
         break;
      case SANY:
          if (utf8_target) {
-           while (scan < loceol && hardcount < max) {
+           while (scan < this_eol && hardcount < max) {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         }
         else
-           scan = loceol;
+           scan = this_eol;
         break;
+
+    case LEXACT_REQ8:
+        if (! utf8_target) {
+            break;
+        }
+        /* FALLTHROUGH */
+
+    case LEXACT:
+      {
+        U8 * string;
+        Size_t str_len;
+
+       string = (U8 *) STRINGl(p);
+        str_len = STR_LENl(p);
+        goto join_short_long_exact;
+
      case EXACTL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
          if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) {
@@ -9155,28 +9737,32 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          }
          goto do_exact;
  
-    case EXACT_ONLY8:
+    case EXACT_REQ8:
          if (! utf8_target) {
              break;
          }
          /* FALLTHROUGH */
      case EXACT:
        do_exact:
-        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+       string = (U8 *) STRINGs(p);
+        str_len = STR_LENs(p);
  
-       c = (U8)*STRING(p);
+      join_short_long_exact:
+        assert(str_len == reginfo->is_utf8_pat ? UTF8SKIP(string) : 1);
+
+       c = *string;
  
          /* Can use a simple find if the pattern char to match on is invariant
           * under UTF-8, or both target and pattern aren't UTF-8.  Note that we
           * can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
           * true iff it doesn't matter if the argument is in UTF-8 or not */
          if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! reginfo->is_utf8_pat)) {
-            if (utf8_target && loceol - scan > max) {
-                /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
+            if (utf8_target && this_eol - scan > max) {
+                /* We didn't adjust <this_eol> because is UTF-8, but ok to do so,
                   * since here, to match at all, 1 char == 1 byte */
-                loceol = scan + max;
+                this_eol = scan + max;
              }
-            scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c);
+            scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
         }
         else if (reginfo->is_utf8_pat) {
              if (utf8_target) {
@@ -9185,9 +9771,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  /* When both target and pattern are UTF-8, we have to do
                   * string EQ */
                  while (hardcount < max
-                       && scan < loceol
-                       && (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
-                       && memEQ(scan, STRING(p), scan_char_len))
+                       && scan < this_eol
+                       && (scan_char_len = UTF8SKIP(scan)) <= str_len
+                       && memEQ(scan, string, scan_char_len))
                  {
                      scan += scan_char_len;
                      hardcount++;
@@ -9197,8 +9783,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
                  /* Target isn't utf8; convert the character in the UTF-8
                   * pattern to non-UTF8, and do a simple find */
-                c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
-                scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c);
+                c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(string + 1));
+                scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
              } /* else pattern char is above Latin1, can't possibly match the
                   non-UTF-8 target */
          }
@@ -9212,7 +9798,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             U8 low = UTF8_TWO_BYTE_LO(c);
  
             while (hardcount < max
-                   && scan + 1 < loceol
+                   && scan + 1 < this_eol
                     && UCHARAT(scan) == high
                     && UCHARAT(scan + 1) == low)
             {
@@ -9221,6 +9807,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             }
         }
         break;
+      }
  
      case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
          assert(! reginfo->is_utf8_pat);
@@ -9253,7 +9840,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                                      | FOLDEQ_S2_FOLDS_SANE;
          goto do_exactf;
  
-    case EXACTFU_ONLY8:
+    case EXACTFU_REQ8:
          if (! utf8_target) {
              break;
          }
@@ -9271,48 +9858,51 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          int c1, c2;
          U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
  
-        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LENs(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRINGs(p)) : 1);
  
          if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
                                          reginfo))
          {
              if (c1 == CHRTEST_VOID) {
                  /* Use full Unicode fold matching */
-                char *tmpeol = reginfo->strend;
-                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
+                char *tmpeol = loceol;
+                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRINGs(p)) : 1;
                  while (hardcount < max
                          && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
-                                             STRING(p), NULL, pat_len,
+                                             STRINGs(p), NULL, pat_len,
                                               reginfo->is_utf8_pat, utf8_flags))
                  {
                      scan = tmpeol;
-                    tmpeol = reginfo->strend;
+                    tmpeol = loceol;
                      hardcount++;
                  }
              }
              else if (utf8_target) {
                  if (c1 == c2) {
-                    while (scan < loceol
+                    while (scan < this_eol
                             && hardcount < max
-                           && memEQ(scan, c1_utf8, UTF8SKIP(scan)))
+                           && memEQ(scan, c1_utf8, UTF8_SAFE_SKIP(scan,
+                                                                  loceol)))
                      {
-                        scan += UTF8SKIP(scan);
+                        scan += UTF8SKIP(c1_utf8);
                          hardcount++;
                      }
                  }
                  else {
-                    while (scan < loceol
+                    while (scan < this_eol
                             && hardcount < max
-                           && (memEQ(scan, c1_utf8, UTF8SKIP(scan))
-                               || memEQ(scan, c2_utf8, UTF8SKIP(scan))))
+                           && (   memEQ(scan, c1_utf8, UTF8_SAFE_SKIP(scan,
+                                                                     loceol))
+                               || memEQ(scan, c2_utf8, UTF8_SAFE_SKIP(scan,
+                                                                     loceol))))
                      {
-                        scan += UTF8SKIP(scan);
+                        scan += UTF8_SAFE_SKIP(scan, loceol);
                          hardcount++;
                      }
                  }
              }
              else if (c1 == c2) {
-                scan = (char *) find_span_end((U8 *) scan, (U8 *) loceol, (U8) c1);
+                scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c1);
              }
              else {
                  /* See comments in regmatch() CURLY_B_min_known_fail.  We avoid
@@ -9324,12 +9914,12 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      U8 c1_c2_mask = ~ c1_c2_bits_differing;
  
                      scan = (char *) find_span_end_mask((U8 *) scan,
-                                                       (U8 *) loceol,
+                                                       (U8 *) this_eol,
                                                         c1 & c1_c2_mask,
                                                         c1_c2_mask);
                  }
                  else {
-                    while (    scan < loceol
+                    while (    scan < this_eol
                             && (UCHARAT(scan) == c1 || UCHARAT(scan) == c2))
                      {
                          scan++;
@@ -9342,49 +9932,47 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      case ANYOFPOSIXL:
      case ANYOFL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
+         CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(p);
  
-        if (ANYOFL_UTF8_LOCALE_REQD(FLAGS(p)) && ! IN_UTF8_CTYPE_LOCALE) {
-            Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required);
-        }
          /* FALLTHROUGH */
      case ANYOFD:
      case ANYOF:
         if (utf8_target) {
             while (hardcount < max
-                   && scan < loceol
-                  && reginclass(prog, p, (U8*)scan, (U8*) loceol, utf8_target))
+                   && scan < this_eol
+                  && reginclass(prog, p, (U8*)scan, (U8*) this_eol, utf8_target))
             {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         }
          else if (ANYOF_FLAGS(p) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
-           while (scan < loceol
+           while (scan < this_eol
                      && reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0))
                 scan++;
          }
          else {
-           while (scan < loceol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
+           while (scan < this_eol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
                 scan++;
         }
         break;
  
      case ANYOFM:
-        if (utf8_target && loceol - scan > max) {
+        if (utf8_target && this_eol - scan > max) {
  
-            /* We didn't adjust <loceol> at the beginning of this routine
+            /* We didn't adjust <this_eol> at the beginning of this routine
               * because is UTF-8, but it is actually ok to do so, since here, to
               * match, 1 char == 1 byte. */
-            loceol = scan + max;
+            this_eol = scan + max;
          }
  
-        scan = (char *) find_span_end_mask((U8 *) scan, (U8 *) loceol, (U8) ARG(p), FLAGS(p));
+        scan = (char *) find_span_end_mask((U8 *) scan, (U8 *) this_eol, (U8) ARG(p), FLAGS(p));
          break;
  
      case NANYOFM:
         if (utf8_target) {
             while (     hardcount < max
-                   &&   scan < loceol
+                   &&   scan < this_eol
                    &&  (*scan & FLAGS(p)) != ARG(p))
             {
                 scan += UTF8SKIP(scan);
@@ -9392,18 +9980,115 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             }
         }
          else {
-            scan = (char *) find_next_masked((U8 *) scan, (U8 *) loceol, (U8) ARG(p), FLAGS(p));
+            scan = (char *) find_next_masked((U8 *) scan, (U8 *) this_eol, (U8) ARG(p), FLAGS(p));
         }
          break;
  
      case ANYOFH:
-        if (utf8_target) while (   hardcount < max
-                                && scan < loceol
-                                && reginclass(prog, p, (U8*)scan, (U8*) loceol,
-                                                                  TRUE))
-        {
-            scan += UTF8SKIP(scan);
-            hardcount++;
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (  hardcount < max
+                   && scan < this_eol
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHb:
+        if (utf8_target) {  /* ANYOFHb only can match UTF-8 targets */
+
+            /* we know the first byte must be the FLAGS field */
+            while (   hardcount < max
+                   && scan < this_eol
+                   && (U8) *scan == ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol,
+                                                              TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHr:
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (  hardcount < max
+                   && scan < this_eol
+                   && inRANGE(NATIVE_UTF8_TO_I8(*scan),
+                              LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)),
+                              HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)))
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHs:
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (   hardcount < max
+                   && scan + FLAGS(p) < this_eol
+                   && memEQ(scan, ((struct regnode_anyofhs *) p)->string, FLAGS(p))
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFR:
+        if (utf8_target) {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) scan,
+                                                (U8 *) this_eol,
+                                                NULL),
+                                  ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        else {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && withinCOUNT((U8) *scan, ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan++;
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFRb:
+        if (utf8_target) {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && (U8) *scan == ANYOF_FLAGS(p)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) scan,
+                                                (U8 *) this_eol,
+                                                NULL),
+                                  ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        else {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && withinCOUNT((U8) *scan, ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan++;
+                hardcount++;
+            }
          }
          break;
  
@@ -9416,16 +10101,16 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      case POSIXL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
         if (! utf8_target) {
-           while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
+           while (scan < this_eol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
                                                                     *scan)))
              {
                 scan++;
              }
         } else {
-           while (hardcount < max && scan < loceol
+           while (hardcount < max && scan < this_eol
                     && to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(p),
                                                                    (U8 *) scan,
-                                                                  (U8 *) loceol)))
+                                                                  (U8 *) this_eol)))
              {
                  scan += UTF8SKIP(scan);
                 hardcount++;
@@ -9440,14 +10125,14 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          /* FALLTHROUGH */
  
      case POSIXA:
-        if (utf8_target && loceol - scan > max) {
+        if (utf8_target && this_eol - scan > max) {
  
-            /* We didn't adjust <loceol> at the beginning of this routine
+            /* We didn't adjust <this_eol> at the beginning of this routine
               * because is UTF-8, but it is actually ok to do so, since here, to
               * match, 1 char == 1 byte. */
-            loceol = scan + max;
+            this_eol = scan + max;
          }
-        while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
+        while (scan < this_eol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
             scan++;
         }
         break;
@@ -9461,7 +10146,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case NPOSIXA:
          if (! utf8_target) {
-            while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
+            while (scan < this_eol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
                  scan++;
              }
          }
@@ -9469,8 +10154,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
              /* The complement of something that matches only ASCII matches all
               * non-ASCII, plus everything in ASCII that isn't in the class. */
-           while (hardcount < max && scan < loceol
-                   && (   ! isASCII_utf8_safe(scan, reginfo->strend)
+           while (hardcount < max && scan < this_eol
+                   && (   ! isASCII_utf8_safe(scan, loceol)
                         || ! _generic_isCC_A((U8) *scan, FLAGS(p))))
              {
                  scan += UTF8SKIP(scan);
@@ -9485,7 +10170,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case POSIXU:
         if (! utf8_target) {
-            while (scan < loceol && to_complement
+            while (scan < this_eol && to_complement
                                  ^ cBOOL(_generic_isCC((U8) *scan, FLAGS(p))))
              {
                  scan++;
@@ -9496,11 +10181,11 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
              classnum = (_char_class_number) FLAGS(p);
              switch (classnum) {
                  default:
-                    while (   hardcount < max && scan < loceol
+                    while (   hardcount < max && scan < this_eol
                             && to_complement ^ cBOOL(_invlist_contains_cp(
                                                PL_XPosix_ptrs[classnum],
                                                utf8_to_uvchr_buf((U8 *) scan,
-                                                                (U8 *) loceol,
+                                                                (U8 *) this_eol,
                                                                  NULL))))
                      {
                          scan += UTF8SKIP(scan);
@@ -9516,9 +10201,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
                  case _CC_ENUM_SPACE:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isSPACE_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isSPACE_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9526,9 +10211,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_BLANK:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                                ^ cBOOL(isBLANK_utf8_safe(scan, loceol))))
+                                ^ cBOOL(isBLANK_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9536,9 +10221,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_XDIGIT:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isXDIGIT_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isXDIGIT_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9546,9 +10231,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_VERTSPACE:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isVERTWS_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isVERTWS_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9556,9 +10241,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      break;
                  case _CC_ENUM_CNTRL:
                      while (hardcount < max
-                           && scan < loceol
+                           && scan < this_eol
                             && (to_complement
-                               ^ cBOOL(isCNTRL_utf8_safe(scan, loceol))))
+                               ^ cBOOL(isCNTRL_utf8_safe(scan, this_eol))))
                      {
                          scan += UTF8SKIP(scan);
                          hardcount++;
@@ -9570,16 +10255,15 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case LNBREAK:
          if (utf8_target) {
-           while (hardcount < max && scan < loceol &&
-                    (c=is_LNBREAK_utf8_safe(scan, loceol))) {
+           while (hardcount < max && scan < this_eol &&
+                    (c=is_LNBREAK_utf8_safe(scan, this_eol))) {
                 scan += c;
                 hardcount++;
             }
         } else {
              /* LNBREAK can match one or two latin chars, which is ok, but we
               * have to use hardcount in this situation, and throw away the
-             * adjustment to <loceol> done before the switch statement */
-            loceol = reginfo->strend;
+             * adjustment to <this_eol> done before the switch statement */
             while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
                 scan+=c;
                 hardcount++;
@@ -9587,25 +10271,6 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         }
         break;
  
-    case BOUNDL:
-    case NBOUNDL:
-        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-        /* FALLTHROUGH */
-    case BOUND:
-    case BOUNDA:
-    case BOUNDU:
-    case EOS:
-    case GPOS:
-    case KEEPS:
-    case NBOUND:
-    case NBOUNDA:
-    case NBOUNDU:
-    case OPFAIL:
-    case SBOL:
-    case SEOL:
-        /* These are all 0 width, so match right here or not at all. */
-        break;
-
      default:
          Perl_croak(aTHX_ "panic: regrepeat() called with unrecognized node type %d='%s'", OP(p), PL_reg_name[OP(p)]);
          NOT_REACHED; /* NOTREACHED */
@@ -9619,7 +10284,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      *startposp = scan;
  
      DEBUG_r({
-       GET_RE_DEBUG_FLAGS_DECL;
+       DECLARE_AND_GET_RE_DEBUG_FLAGS;
         DEBUG_EXECUTE_r({
             SV * const prop = sv_newmortal();
              regprop(prog, prop, p, reginfo, NULL);
@@ -9650,8 +10315,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  STATIC bool
  S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const U8* const p_end, const bool utf8_target)
  {
-    dVAR;
-    const char flags = ANYOF_FLAGS(n);
+    const char flags = (inRANGE(OP(n), ANYOFH, ANYOFHs))
+                        ? 0
+                        : ANYOF_FLAGS(n);
      bool match = FALSE;
      UV c = *p;
  
@@ -9678,7 +10344,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
      }
  
      /* If this character is potentially in the bitmap, check it */
-    if (c < NUM_ANYOF_CODE_POINTS && OP(n) != ANYOFH) {
+    if (c < NUM_ANYOF_CODE_POINTS && ! inRANGE(OP(n), ANYOFH, ANYOFHb)) {
         if (ANYOF_BITMAP_TEST(n, c))
             match = TRUE;
         else if ((flags
@@ -9691,7 +10357,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
         }
         else if (flags & ANYOF_LOCALE_FLAGS) {
             if (  (flags & ANYOFL_FOLD)
-                && c < sizeof(PL_fold_locale)
+                && c < 256
                 && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
              {
                  match = TRUE;
@@ -9779,8 +10445,14 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
                           && IN_UTF8_CTYPE_LOCALE)))
          {
              SV* only_utf8_locale = NULL;
-           SV * const definition = _get_regclass_nonbitmap_data(prog, n, TRUE,
-                                                   0, &only_utf8_locale, NULL);
+           SV * const definition =
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+                get_regclass_nonbitmap_data(prog, n, TRUE, 0,
+                                            &only_utf8_locale, NULL);
+#else
+                get_re_gclass_nonbitmap_data(prog, n, TRUE, 0,
+                                             &only_utf8_locale, NULL);
+#endif
             if (definition) {
                  U8 utf8_buffer[2];
                 U8 * utf8_p;
@@ -9980,6 +10652,7 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
      regmatch_info_aux_eval *eval_state = reginfo->info_aux_eval;
  
      eval_state->rex = rex;
+    eval_state->sv  = reginfo->sv;
  
      if (reginfo->sv) {
          /* Make $_ available to executed code. */
@@ -9987,6 +10660,8 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
              SAVE_DEFSV;
              DEFSV_set(reginfo->sv);
          }
+        /* will be dec'd by S_cleanup_regmatch_info_aux */
+        SvREFCNT_inc_NN(reginfo->sv);
  
          if (!(mg = mg_find_mglob(reginfo->sv))) {
              /* prepare for quick setting of pos */
@@ -10012,7 +10687,7 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
              /* this regexp is also owned by the new PL_reg_curpm, which
                 will try to free it.  */
              av_push(PL_regex_padav, repointer);
-            PL_reg_curpm->op_pmoffset = av_tindex(PL_regex_padav);
+            PL_reg_curpm->op_pmoffset = av_top_index(PL_regex_padav);
              PL_regex_pad = AvARRAY(PL_regex_padav);
          }
  #endif
@@ -10078,6 +10753,7 @@ S_cleanup_regmatch_info_aux(pTHX_ void *arg)
          }
  
          PL_curpm = eval_state->curpm;
+        SvREFCNT_dec(eval_state->sv);
      }
  
      PL_regmatch_state = aux->old_regmatch_state;
@@ -10148,6 +10824,7 @@ S_to_byte_substr(pTHX_ regexp *prog)
             && !prog->substrs->data[i].substr) {
             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
             if (! sv_utf8_downgrade(sv, TRUE)) {
+                SvREFCNT_dec_NN(sv);
                  return FALSE;
              }
              if (SvVALID(prog->substrs->data[i].utf8_substr)) {
@@ -10171,23 +10848,22 @@ S_to_byte_substr(pTHX_ regexp *prog)
  #ifndef PERL_IN_XSUB_RE
  
  bool
-Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const UV cp)
+Perl_is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const UV cp)
  {
      /* Temporary helper function for toke.c.  Verify that the code point 'cp'
       * is a stand-alone grapheme.  The UTF-8 for 'cp' begins at position 's' in
       * the larger string bounded by 'strbeg' and 'strend'.
       *
-     * 'cp' needs to be assigned (if not a future version of the Unicode
+     * 'cp' needs to be assigned (if not, a future version of the Unicode
       * Standard could make it something that combines with adjacent characters,
       * so code using it would then break), and there has to be a GCB break
       * before and after the character. */
  
-    dVAR;
  
      GCB_enum cp_gcb_val, prev_cp_gcb_val, next_cp_gcb_val;
      const U8 * prev_cp_start;
  
-    PERL_ARGS_ASSERT__IS_GRAPHEME;
+    PERL_ARGS_ASSERT_IS_GRAPHEME;
  
      if (   UNLIKELY(UNICODE_IS_SUPER(cp))
          || UNLIKELY(UNICODE_IS_NONCHAR(cp)))
@@ -10235,7 +10911,7 @@ Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, cons
  }
  
  /*
-=head1 Unicode Support
+=for apidoc_section $unicode
  
  =for apidoc isSCRIPT_RUN
  
@@ -10250,11 +10926,13 @@ Additionally all decimal digits must come from the same consecutive sequence of
  
  For example, if all the characters in the sequence are Greek, or Common, or
  Inherited, this function will return TRUE, provided any decimal digits in it
-are the ASCII digits "0".."9".  For scripts (unlike Greek) that have their own
-digits defined this will accept either digits from that set or from 0..9, but
-not a combination of the two.  Some scripts, such as Arabic, have more than one
-set of digits.  All digits must come from the same set for this function to
-return TRUE.
+are from the same block of digits in Common.  (These are the ASCII digits
+"0".."9" and additionally a block for full width forms of these, and several
+others used in mathematical notation.)   For scripts (unlike Greek) that have
+their own digits defined this will accept either digits from that set or from
+one of the Common digit sets, but not a combination of the two.  Some scripts,
+such as Arabic, have more than one set of digits.  All digits must come from
+the same set for this function to return TRUE.
  
  C<*ret_script>, if C<ret_script> is not NULL, will on return of TRUE
  contain the script found, using the C<SCX_enum> typedef.  Its value will be
@@ -10302,7 +10980,6 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
       * characters for at least one language in the Unicode Common Locale Data
       * Repository [CLDR]. */
  
-    dVAR;
  
      /* Things that match /\d/u */
      SV * decimals_invlist = PL_XPosix_ptrs[_CC_DIGIT];
@@ -10357,10 +11034,9 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          UV cp;
  
          /* The code allows all scripts to use the ASCII digits.  This is
-         * because they are used in commerce even in scripts that have their
-         * own set.  Hence any ASCII ones found are ok, unless and until a
-         * digit from another set has already been encountered.  (The other
-         * digit ranges in Common are not similarly blessed) */
+         * because they are in the Common script.  Hence any ASCII ones found
+         * are ok, unless and until a digit from another set has already been
+         * encountered.  digit ranges in Common are not similarly blessed) */
          if (UNLIKELY(isDIGIT(*s))) {
              if (UNLIKELY(script_of_run == SCX_Unknown)) {
                  retval = FALSE;
@@ -10392,10 +11068,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          /* If is within the range [+0 .. +9] of the script's zero, it also is a
           * digit in that script.  We can skip the rest of this code for this
           * character. */
-        if (UNLIKELY(   zero_of_run
-                     && cp >= zero_of_run
-                     && cp - zero_of_run <= 9))
-        {
+        if (UNLIKELY(zero_of_run && withinCOUNT(cp, zero_of_run, 9))) {
              continue;
          }
  
@@ -10454,19 +11127,11 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          /* If the run so far is Common, and the new character isn't, change the
           * run's script to that of this character */
          if (script_of_run == SCX_Common && script_of_char != SCX_Common) {
-
-            /* But Common contains several sets of digits.  Only the '0' set
-             * can be part of another script. */
-            if (zero_of_run && zero_of_run != '0') {
-                retval = FALSE;
-                break;
-            }
-
              script_of_run = script_of_char;
          }
  
-        /* Now we can see if the script of the character is the same as that of
-         * the run */
+        /* Now we can see if the script of the new character is the same as
+         * that of the run */
          if (LIKELY(script_of_char == script_of_run)) {
              /* By far the most common case */
              goto scripts_match;
@@ -10624,7 +11289,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
           * several scripts, and the intersection is not empty.  However, if the
           * character is a decimal digit, it could still mean failure if it is
           * from the wrong sequence of 10.  So, we need to look at if it's a
-         * digit.  We've already handled the 10 decimal digits, and the next
+         * digit.  We've already handled the 10 digits [0-9], and the next
           * lowest one is this one: */
          if (cp < FIRST_NON_ASCII_DECIMAL_DIGIT) {
              continue;   /* Not a digit; this character is part of the run */
@@ -10636,9 +11301,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          if (   script_of_char >= 0
              && (zero_of_char = script_zeros[script_of_char]))
          {
-            if (   cp < zero_of_char
-                || cp > zero_of_char + 9)
-            {
+            if (! withinCOUNT(cp, zero_of_char, 9)) {
                  continue;   /* Not a digit; this character is part of the run
                               */
              }
@@ -10666,14 +11329,6 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
                  break;
              }
          }
-        else if (script_of_char == SCX_Common && script_of_run != SCX_Common) {
-
-            /* Here, the script run isn't Common, but the current digit is in
-             * Common, and isn't '0'-'9' (those were handled earlier).   Only
-             * '0'-'9' are acceptable in non-Common scripts. */
-            retval = FALSE;
-            break;
-        }
          else {  /* Otherwise we now have a zero for this run */
              zero_of_run = zero_of_char;
          }