Document cast NV to int macros; make helpers internal

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index deecde7..91fb3d2 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -92,7 +92,7 @@ static const char utf8_locale_required[] =
  #ifdef DEBUGGING
  /* At least one required character in the target string is expressible only in
   * UTF-8. */
-static const char* const non_utf8_target_but_utf8_required
+static const char non_utf8_target_but_utf8_required[]
                  = "Can't match, because target string needs to be in UTF-8\n";
  #endif
  
@@ -218,7 +218,7 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
      const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
      const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
      I32 p;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGCPPUSH;
  
@@ -328,7 +328,7 @@ S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p _pDEPTH)
  {
      UV i;
      U32 paren;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGCPPOP;
  
@@ -422,7 +422,7 @@ Perl_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
       * Ideally this could be replaced by a just an array of function pointers
       * to the C library functions that implement the macros this calls.
       * However, to compile, the precise function signatures are required, and
-     * these may vary from platform to to platform.  To avoid having to figure
+     * these may vary from platform to platform.  To avoid having to figure
       * out what those all are on each platform, I (khw) am using this method,
       * which adds an extra layer of function call overhead (unless the C
       * optimizer strips it away).  But we don't particularly care about
@@ -496,7 +496,6 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e)
       * rules, ignoring any locale.  So use the Unicode function if this class
       * requires an inversion list, and use the Unicode macro otherwise. */
  
-    dVAR;
  
      PERL_ARGS_ASSERT_ISFOO_UTF8_LC;
  
@@ -579,7 +578,7 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
              span_word |= span_word << 4;
  
              /* That reduces the problem to what this function solves */
-            return s + _variant_byte_number(span_word);
+            return s + variant_byte_number(span_word);
  
  #endif
  
@@ -657,7 +656,7 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
              masked &= PERL_VARIANTS_WORD_MASK;
  
              /* This reduces the problem to that solved by this function */
-            s += _variant_byte_number(masked);
+            s += variant_byte_number(masked);
              return s;
  
          } while (s + PERL_WORDSIZE <= send);
@@ -723,7 +722,7 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
              masked |= masked << 1;
              masked |= masked << 2;
              masked |= masked << 4;
-            return s + _variant_byte_number(masked);
+            return s + variant_byte_number(masked);
  
  #endif
  
@@ -859,7 +858,7 @@ Perl_re_intuit_start(pTHX_
      RXi_GET_DECL(prog,progi);
      regmatch_info reginfo_buf;  /* create some info to pass to find_byclass */
      regmatch_info *const reginfo = &reginfo_buf;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_RE_INTUIT_START;
      PERL_UNUSED_ARG(flags);
@@ -1173,8 +1172,8 @@ Perl_re_intuit_start(pTHX_
  
      /* now look for the 'other' substring if defined */
  
-    if (utf8_target ? prog->substrs->data[other_ix].utf8_substr
-                    : prog->substrs->data[other_ix].substr)
+    if (prog->substrs->data[other_ix].utf8_substr
+        || prog->substrs->data[other_ix].substr)
      {
         /* Take into account the "other" substring. */
          char *last, *last1;
@@ -1184,6 +1183,11 @@ Perl_re_intuit_start(pTHX_
  
        do_other_substr:
          other = &prog->substrs->data[other_ix];
+        if (!utf8_target && !other->substr) {
+            if (!to_byte_substr(prog)) {
+                NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
+            }
+        }
  
          /* if "other" is anchored:
           * we've previously found a floating substr starting at check_at.
@@ -1467,10 +1471,10 @@ Perl_re_intuit_start(pTHX_
          const U8* const str = (U8*)STRING(progi->regstclass);
  
          /* XXX this value could be pre-computed */
-        const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
+        const SSize_t cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
                     ?  (reginfo->is_utf8_pat
-                        ? utf8_distance(str + STR_LEN(progi->regstclass), str)
-                        : STR_LEN(progi->regstclass))
+                        ? (SSize_t)utf8_distance(str + STR_LEN(progi->regstclass), str)
+                        : (SSize_t)STR_LEN(progi->regstclass))
                     : 1);
         char * endpos;
          char *s;
@@ -1720,7 +1724,7 @@ STMT_START {
          } else {                                                                    \
              uvc = _toFOLD_utf8_flags( (const U8*) uc, uc_end, foldbuf, &foldlen,    \
                                                                              flags); \
-            len = UTF8SKIP(uc);                                                     \
+            len = UTF8_SAFE_SKIP(uc, uc_end);                                       \
              skiplen = UVCHR_SKIP( uvc );                                            \
              foldlen -= skiplen;                                                     \
              uscan = foldbuf + skiplen;                                              \
@@ -1782,7 +1786,9 @@ STMT_START {
      STMT_START {                                            \
          while (s < strend) {                                \
              CODE                                            \
-            s += ((UTF8) ? UTF8SKIP(s) : 1);                \
+            s += ((UTF8)                                    \
+                  ? UTF8_SAFE_SKIP(s, reginfo->strend)      \
+                  : 1);                                     \
          }                                                   \
      } STMT_END
  
@@ -1796,7 +1802,7 @@ STMT_START {
  #define REXEC_FBC_CLASS_SCAN_GUTS(UTF8, COND)                  \
      if (COND) {                                                \
          FBC_CHECK_AND_TRY                                      \
-        s += ((UTF8) ? UTF8SKIP(s) : 1);                       \
+        s += ((UTF8) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1);\
          previous_occurrence_end = s;                           \
      }                                                          \
      else {                                                     \
@@ -1815,12 +1821,13 @@ STMT_START {
   * of the one we're looking for.  Knowing that, we can see right away if the
   * next occurrence is adjacent to the previous.  When 'doevery' is FALSE, we
   * don't accept the 2nd and succeeding adjacent occurrences */
-#define FBC_CHECK_AND_TRY                                      \
-        if (   (   doevery                                     \
-                || s != previous_occurrence_end)               \
-            && (reginfo->intuit || regtry(reginfo, &s)))       \
-        {                                                      \
-            goto got_it;                                       \
+#define FBC_CHECK_AND_TRY                                           \
+        if (   (   doevery                                          \
+                || s != previous_occurrence_end)                    \
+            && (   reginfo->intuit                                  \
+                || (s <= reginfo->strend && regtry(reginfo, &s))))  \
+        {                                                           \
+            goto got_it;                                            \
          }
  
  
@@ -1839,6 +1846,28 @@ STMT_START {
          previous_occurrence_end = s;                        \
      }
  
+/* This differs from the above macros in that it is passed a single byte that
+ * is known to begin the next occurrence of the thing being looked for in 's'.
+ * It does a memchr to find the next occurrence of 'byte', before trying 'COND'
+ * at that position. */
+#define REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(byte, COND)      \
+    while (s < strend) {                                    \
+        s = (char *) memchr(s, byte, strend -s);            \
+        if (s == NULL) {                                    \
+            s = (char *) strend;                            \
+            break;                                          \
+        }                                                   \
+                                                            \
+        if (COND) {                                         \
+            FBC_CHECK_AND_TRY                               \
+            s += UTF8_SAFE_SKIP(s, reginfo->strend);        \
+            previous_occurrence_end = s;                    \
+        }                                                   \
+        else {                                              \
+            s += UTF8SKIP(s);                               \
+        }                                                   \
+    }
+
  /* The three macros below are slightly different versions of the same logic.
   *
   * The first is for /a and /aa when the target string is UTF-8.  This can only
@@ -1890,7 +1919,8 @@ STMT_START {
  
  /* Like FBC_UTF8_A, but TEST_UV is a macro which takes a UV as its input, and
   * TEST_UTF8 is a macro that for the same input code points returns identically
- * to TEST_UV, but takes a pointer to a UTF-8 encoded string instead */
+ * to TEST_UV, but takes a pointer to a UTF-8 encoded string instead (and an
+ * end pointer as well) */
  #define FBC_UTF8(TEST_UV, TEST_UTF8, IF_SUCCESS, IF_FAIL)                      \
      if (s == reginfo->strbeg) {                                                \
          tmp = '\n';                                                            \
@@ -1945,9 +1975,12 @@ STMT_START {
      }
  
  /* This is the macro to use when we want to see if something that looks like it
- * could match, actually does, and if so exits the loop */
-#define REXEC_FBC_TRYIT                            \
-    if ((reginfo->intuit || regtry(reginfo, &s)))  \
+ * could match, actually does, and if so exits the loop.  It needs to be used
+ * only for bounds checking macros, as it allows for matching beyond the end of
+ * string (which should be zero length without having to look at the string
+ * contents) */
+#define REXEC_FBC_TRYIT                                                     \
+    if (reginfo->intuit || (s <= reginfo->strend && regtry(reginfo, &s)))   \
          goto got_it
  
  /* The only difference between the BOUND and NBOUND cases is that
@@ -2065,7 +2098,6 @@ STATIC char *
  S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, 
      const char *strend, regmatch_info *reginfo)
  {
-    dVAR;
  
      /* TRUE if x+ need not match at just the 1st pos of run of x's */
      const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
@@ -2129,21 +2161,89 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          break;
  
      case ANYOFM:    /* ARG() is the base byte; FLAGS() the mask byte */
-        /* UTF-8ness doesn't matter, so use 0 */
+        /* UTF-8ness doesn't matter because only matches UTF-8 invariants, so
+         * use 0 */
          REXEC_FBC_FIND_NEXT_SCAN(0,
           (char *) find_next_masked((U8 *) s, (U8 *) strend,
                                     (U8) ARG(c), FLAGS(c)));
          break;
  
-    case NANYOFM:
-        REXEC_FBC_FIND_NEXT_SCAN(0,
+    case NANYOFM:   /* UTF-8ness does matter because can match UTF-8 variants.
+                     */
+        REXEC_FBC_FIND_NEXT_SCAN(utf8_target,
           (char *) find_span_end_mask((U8 *) s, (U8 *) strend,
                                     (U8) ARG(c), FLAGS(c)));
          break;
  
      case ANYOFH:
-        if (utf8_target) REXEC_FBC_CLASS_SCAN(TRUE,
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   (U8) NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
+        break;
+
+    case ANYOFHb:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+
+            /* We know what the first byte of any matched string should be */
+            U8 first_byte = FLAGS(c);
+
+            REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
                        reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
+        }
+        break;
+
+    case ANYOFHr:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   inRANGE(NATIVE_UTF8_TO_I8(*s),
+                              LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)),
+                              HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)))
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
+        break;
+
+    case ANYOFHs:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   strend -s >= FLAGS(c)
+                   && memEQ(s, ((struct regnode_anyofhs *) c)->string, FLAGS(c))
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
+        break;
+
+    case ANYOFR:
+        if (utf8_target) {
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) s,
+                                                    (U8 *) strend,
+                                                    NULL),
+                                  ANYOFRbase(c), ANYOFRdelta(c))));
+        }
+        else {
+            REXEC_FBC_CLASS_SCAN(0, withinCOUNT((U8) *s,
+                                               ANYOFRbase(c), ANYOFRdelta(c)));
+        }
+        break;
+
+    case ANYOFRb:
+        if (utf8_target) {
+
+            /* We know what the first byte of any matched string should be */
+            U8 first_byte = FLAGS(c);
+
+            REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
+                      withinCOUNT(utf8_to_uvchr_buf((U8 *) s,
+                                                    (U8 *) strend,
+                                                    NULL),
+                                  ANYOFRbase(c), ANYOFRdelta(c)));
+        }
+        else {
+            REXEC_FBC_CLASS_SCAN(0, withinCOUNT((U8) *s,
+                                               ANYOFRbase(c), ANYOFRdelta(c)));
+        }
          break;
  
      case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
@@ -2207,7 +2307,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                                               | FOLDEQ_S2_FOLDS_SANE;
              goto do_exactf_utf8;
  
-    case EXACTFU_ONLY8:
+    case EXACTFU_REQ8:
          if (! utf8_target) {
              break;
          }
@@ -2239,8 +2339,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
           * first character.  c2 is its fold.  This logic will not work for
           * Unicode semantics and the german sharp ss, which hence should
           * not be compiled into a node that gets here. */
-        pat_string = STRING(c);
-        ln  = STR_LEN(c);      /* length to match in octets/bytes */
+        pat_string = STRINGs(c);
+        ln  = STR_LENs(c);     /* length to match in octets/bytes */
  
          /* We know that we have to match at least 'ln' bytes (which is the
           * same as characters, since not utf8).  If we have to match 3
@@ -2315,8 +2415,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          /* If one of the operands is in utf8, we can't use the simpler folding
           * above, due to the fact that many different characters can have the
           * same fold, or portion of a fold, or different- length fold */
-        pat_string = STRING(c);
-        ln  = STR_LEN(c);      /* length to match in octets/bytes */
+        pat_string = STRINGs(c);
+        ln  = STR_LENs(c);     /* length to match in octets/bytes */
          pat_end = pat_string + ln;
          lnc = is_utf8_pat       /* length to match in characters */
                  ? utf8_length((U8 *) pat_string, (U8 *) pat_end)
@@ -2355,7 +2455,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
              {
                  goto got_it;
              }
-            s += (utf8_target) ? UTF8SKIP(s) : 1;
+            s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
          }
          break;
      }
@@ -2439,7 +2539,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
  
                      /* Didn't match.  Try at the next position (if there is one) */
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2463,7 +2563,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                              goto got_it;
                          }
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8.  Everything is a GCB except between CR and
@@ -2481,7 +2581,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
  
                  /* And, since this is a bound, it can match after the final
                   * character in the string */
-                if ((reginfo->intuit || regtry(reginfo, &s))) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
                  break;
@@ -2491,7 +2593,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if (reginfo->intuit || regtry(reginfo, &s)) {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2515,7 +2617,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                              goto got_it;
                          }
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8. */
@@ -2537,7 +2639,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
                  }
  
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
  
@@ -2548,7 +2652,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if (reginfo->intuit || regtry(reginfo, &s)) {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2573,7 +2677,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                              goto got_it;
                          }
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8. */
@@ -2598,7 +2702,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                  /* Here are at the final position in the target string.  The SB
                   * value is always true here, so matches, depending on other
                   * constraints */
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
  
@@ -2609,7 +2715,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      if (reginfo->intuit || regtry(reginfo, &s)) {
                          goto got_it;
                      }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                      if (UNLIKELY(s >= reginfo->strend)) {
                          break;
                      }
@@ -2643,7 +2749,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                          }
                          previous = before;
                          before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                      }
                  }
                  else {  /* Not utf8. */
@@ -2668,7 +2774,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
                  }
  
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                      goto got_it;
                  }
          }
@@ -2805,7 +2913,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
              U8 *bitmap=NULL;
  
  
-            GET_RE_DEBUG_FLAGS_DECL;
+            DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
              /* We can't just allocate points here. We need to wrap it in
               * an SV so it gets freed properly if there is a croak while
@@ -2985,7 +3093,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                          LEAVE;
                          goto got_it;
                      }
-                    s = HOPc(s,1);
+                    if (s < reginfo->strend) {
+                        s = HOPc(s,1);
+                    }
                      DEBUG_TRIE_EXECUTE_r({
                          Perl_re_printf( aTHX_ "Pattern failed. Looking for new start point...\n");
                      });
@@ -3186,7 +3296,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
      regmatch_info *const reginfo = &reginfo_buf;
      regexp_paren_pair *swap = NULL;
      I32 oldsave;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGEXEC_FLAGS;
      PERL_UNUSED_ARG(data);
@@ -3240,7 +3350,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                  if (!startpos ||
                      ((flags & REXEC_FAIL_ON_UNDERFLOW) && startpos < stringarg))
                  {
-                    DEBUG_r(Perl_re_printf( aTHX_
+                    DEBUG_GPOS_r(Perl_re_printf( aTHX_
                              "fail: ganch-gofs before earliest possible start\n"));
                      return 0;
                  }
@@ -3259,8 +3369,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
  
      minlen = prog->minlen;
      if ((startpos + minlen) > strend || startpos < strbeg) {
-        DEBUG_r(Perl_re_printf( aTHX_
-                    "Regex match can't succeed, so not even tried\n"));
+       DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
+                        "Regex match can't succeed, so not even tried\n"));
          return 0;
      }
  
@@ -3305,7 +3415,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
              RXp_MATCH_UTF8_set(prog, utf8_target);
              prog->offs[0].start = s - strbeg;
              prog->offs[0].end = utf8_target
-                ? (char*)utf8_hop((U8*)s, prog->minlenret) - strbeg
+                ? (char*)utf8_hop_forward((U8*)s, prog->minlenret, (U8 *) strend) - strbeg
                  : s - strbeg + prog->minlenret;
              if ( !(flags & REXEC_NOT_FIRST) )
                  S_reg_set_capture_string(aTHX_ rx,
@@ -3500,11 +3610,11 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                  to_utf8_substr(prog);
              }
              ch = SvPVX_const(prog->anchored_utf8)[0];
-           REXEC_FBC_SCAN(0,   /* 0=>not-utf8 */
+           REXEC_FBC_SCAN(1,   /* 1=>utf8 */
                 if (*s == ch) {
                     DEBUG_EXECUTE_r( did_match = 1 );
                     if (regtry(reginfo, &s)) goto got_it;
-                   s += UTF8SKIP(s);
+                   s += UTF8_SAFE_SKIP(s, strend);
                     while (s < strend && *s == ch)
                         s += UTF8SKIP(s);
                 }
@@ -3872,7 +3982,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
      U32 depth = 0; /* used by REGCP_SET */
  #endif
      RXi_GET_DECL(prog,progi);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
      PERL_ARGS_ASSERT_REGTRY;
  
@@ -4162,13 +4272,14 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
       * to/from code points */
      bool utf8_has_been_setup = FALSE;
  
-    dVAR;
  
      U8 *pat = (U8*)STRING(text_node);
      U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
  
      if (   OP(text_node) == EXACT
-        || OP(text_node) == EXACT_ONLY8
+        || OP(text_node) == LEXACT
+        || OP(text_node) == EXACT_REQ8
+        || OP(text_node) == LEXACT_REQ8
          || OP(text_node) == EXACTL)
      {
  
@@ -4177,7 +4288,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
           * copy the input to the output, avoiding finding the code point of
           * that character */
          if (!is_utf8_pat) {
-            assert(OP(text_node) != EXACT_ONLY8);
+            assert(   OP(text_node) != EXACT_REQ8
+                   && OP(text_node) != LEXACT_REQ8);
              c2 = c1 = *pat;
          }
          else if (utf8_target) {
@@ -4185,7 +4297,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
              Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
              utf8_has_been_setup = TRUE;
          }
-        else if (OP(text_node) == EXACT_ONLY8) {
+        else if (   OP(text_node) == EXACT_REQ8
+                 || OP(text_node) == LEXACT_REQ8)
+        {
              return FALSE;   /* Can only match UTF-8 target */
          }
          else {
@@ -4193,7 +4307,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
          }
      }
      else { /* an EXACTFish node */
-        U8 *pat_end = pat + STR_LEN(text_node);
+        U8 *pat_end = pat + STR_LENs(text_node);
  
          /* An EXACTFL node has at least some characters unfolded, because what
           * they match is not known until now.  So, now is the time to fold
@@ -4275,8 +4389,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                  }
              }
              else if (c1 > 255) {
-                const unsigned int * remaining_folds;
-                unsigned int first_fold;
+                const U32 * remaining_folds;
+                U32 first_fold;
  
                  /* Look up what code points (besides c1) fold to c1;  e.g.,
                   * [ 'K', KELVIN_SIGN ] both fold to 'k'. */
@@ -4358,7 +4472,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                      case EXACTFU:
                          c2 = PL_fold_latin1[c1];
                          break;
-                    case EXACTFU_ONLY8:
+                    case EXACTFU_REQ8:
                          return FALSE;
                          NOT_REACHED; /* NOTREACHED */
  
@@ -4410,7 +4524,7 @@ STATIC bool
  S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strbeg, const U8 * const curpos, const bool utf8_target)
  {
      /* returns a boolean indicating if there is a Grapheme Cluster Boundary
-     * between the inputs.  See http://www.unicode.org/reports/tr29/. */
+     * between the inputs.  See https://www.unicode.org/reports/tr29/. */
  
      PERL_ARGS_ASSERT_ISGCB;
  
@@ -4472,7 +4586,7 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
                  }
                  while (prev == GCB_Extend);
  
-                return prev != GCB_XPG_XX;
+                return prev != GCB_ExtPict_XX;
              }
  
          default:
@@ -4490,7 +4604,6 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
  STATIC GCB_enum
  S_backup_one_GCB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      GCB_enum gcb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_GCB;
@@ -4768,7 +4881,6 @@ S_isLB(pTHX_ LB_enum before,
  STATIC LB_enum
  S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
  {
-    dVAR;
  
      LB_enum lb;
  
@@ -4799,7 +4911,6 @@ S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_ta
  STATIC LB_enum
  S_backup_one_LB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      LB_enum lb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_LB;
@@ -4847,7 +4958,7 @@ S_isSB(pTHX_ SB_enum before,
               const bool utf8_target)
  {
      /* returns a boolean indicating if there is a Sentence Boundary Break
-     * between the inputs.  See http://www.unicode.org/reports/tr29/ */
+     * between the inputs.  See https://www.unicode.org/reports/tr29/ */
  
      U8 * lpos = (U8 *) curpos;
      bool has_para_sep = FALSE;
@@ -5036,7 +5147,6 @@ S_isSB(pTHX_ SB_enum before,
  STATIC SB_enum
  S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
  {
-    dVAR;
      SB_enum sb;
  
      PERL_ARGS_ASSERT_ADVANCE_ONE_SB;
@@ -5070,7 +5180,6 @@ S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_ta
  STATIC SB_enum
  S_backup_one_SB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      SB_enum sb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_SB;
@@ -5307,7 +5416,6 @@ S_advance_one_WB(pTHX_ U8 ** curpos,
                         const bool utf8_target,
                         const bool skip_Extend_Format)
  {
-    dVAR;
      WB_enum wb;
  
      PERL_ARGS_ASSERT_ADVANCE_ONE_WB;
@@ -5345,7 +5453,6 @@ S_advance_one_WB(pTHX_ U8 ** curpos,
  STATIC WB_enum
  S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
-    dVAR;
      WB_enum wb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_WB;
@@ -5465,18 +5572,20 @@ S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos,
  
  /* push a new state then goto it */
  
-#define PUSH_STATE_GOTO(state, node, input, eol) \
+#define PUSH_STATE_GOTO(state, node, input, eol, sr0)       \
      pushinput = input; \
      pusheol = eol; \
+    pushsr0 = sr0; \
      scan = node; \
      st->resume_state = state; \
      goto push_state;
  
  /* push a new state with success backtracking, then goto it */
  
-#define PUSH_YES_STATE_GOTO(state, node, input, eol) \
+#define PUSH_YES_STATE_GOTO(state, node, input, eol, sr0)   \
      pushinput = input; \
      pusheol = eol;     \
+    pushsr0 = sr0; \
      scan = node; \
      st->resume_state = state; \
      goto push_yes_state;
@@ -5532,7 +5641,7 @@ the subpattern to be matched possibly multiple times, while B is the entire
  rest of the pattern. Variable and state names reflect this convention.
  
  The states in the main switch are the union of ops and failure/success of
-substates associated with with that op.  For example, IFMATCH is the op
+substates associated with that op.  For example, IFMATCH is the op
  that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
  'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
  successfully matched A and IFMATCH_A_fail is a state saying that we have
@@ -5642,7 +5751,6 @@ bounds of our window into the string.
  STATIC SSize_t
  S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  {
-    dVAR;
      const bool utf8_target = reginfo->is_utf8_target;
      const U32 uniflags = UTF8_ALLOW_DEFAULT;
      REGEXP *rex_sv = reginfo->prog;
@@ -5660,6 +5768,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
      char *loceol = reginfo->strend;
      char *pushinput; /* where to continue after a PUSH */
      char *pusheol;   /* where to stop matching (loceol) after a PUSH */
+    U8   *pushsr0;   /* save starting pos of script run */
      I32 nextchr;   /* is always set to UCHARAT(locinput), or -1 at EOS */
  
      bool result = 0;       /* return value of S_regmatch */
@@ -5721,7 +5830,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  #endif
  
  #ifdef DEBUGGING
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
  #endif
  
      /* protect against undef(*^R) */
@@ -5796,7 +5905,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             /* update the startpoint */
             st->u.keeper.val = rex->offs[0].start;
             rex->offs[0].start = locinput - reginfo->strbeg;
-           PUSH_STATE_GOTO(KEEPS_next, next, locinput, loceol);
+           PUSH_STATE_GOTO(KEEPS_next, next, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case KEEPS_next_fail:
@@ -6127,6 +6237,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
                     while (chars) {
                         if (utf8_target) {
+                            /* XXX This assumes the length is well-formed, as
+                             * does the UTF8SKIP below */
                             uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len,
                                                     uniflags);
                             uc += len;
@@ -6170,7 +6282,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             });
  
             if ( ST.accepted > 1 || has_cutgroup || ST.jump ) {
-               PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc, loceol);
+               PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
             /* only one choice left - just continue */
@@ -6198,6 +6311,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
          }
  #undef  ST
  
+       case LEXACT_REQ8:
+            if (! utf8_target) {
+                sayNO;
+            }
+            /* FALLTHROUGH */
+
+       case LEXACT:
+        {
+           char *s;
+
+           s = STRINGl(scan);
+           ln = STR_LENl(scan);
+            goto join_short_long_exact;
+
         case EXACTL:             /*  /abc/l       */
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
  
@@ -6211,16 +6338,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend);
              }
              goto do_exact;
-       case EXACT_ONLY8:
+       case EXACT_REQ8:
              if (! utf8_target) {
                  sayNO;
              }
              /* FALLTHROUGH */
-       case EXACT: {            /*  /abc/        */
-           char *s;
+
+       case EXACT:             /*  /abc/        */
            do_exact:
-           s = STRING(scan);
-           ln = STR_LEN(scan);
+           s = STRINGs(scan);
+           ln = STR_LENs(scan);
+
+          join_short_long_exact:
             if (utf8_target != is_utf8_pat) {
                 /* The target and the pattern have differing utf8ness. */
                 char *l = locinput;
@@ -6323,7 +6452,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             fold_array = PL_fold_latin1;
             goto do_exactf;
  
-        case EXACTFU_ONLY8:      /* /abc/iu with something in /abc/ > 255 */
+        case EXACTFU_REQ8:      /* /abc/iu with something in /abc/ > 255 */
              if (! utf8_target) {
                  sayNO;
              }
@@ -6372,8 +6501,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             fold_utf8_flags = 0;
  
           do_exactf:
-           s = STRING(scan);
-           ln = STR_LEN(scan);
+           s = STRINGs(scan);
+           ln = STR_LENs(scan);
  
             if (   utf8_target
                  || is_utf8_pat
@@ -6430,9 +6559,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 if (locinput == reginfo->strbeg)
                     b1 = isWORDCHAR_LC('\n');
                 else {
-                    b1 = isWORDCHAR_LC_utf8_safe(reghop3((U8*)locinput, -1,
-                                                        (U8*)(reginfo->strbeg)),
-                                                 (U8*)(reginfo->strend));
+                    U8 *p = reghop3((U8*)locinput, -1,
+                                    (U8*)(reginfo->strbeg));
+                    b1 = isWORDCHAR_LC_utf8_safe(p, (U8*)(reginfo->strend));
                 }
                  b2 = (NEXTCHR_IS_EOS)
                      ? isWORDCHAR_LC('\n')
@@ -6509,13 +6638,15 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      case TRADITIONAL_BOUND:
                      {
                          bool b1, b2;
-                        b1 = (locinput == reginfo->strbeg)
-                             ? 0 /* isWORDCHAR_L1('\n') */
-                             : isWORDCHAR_utf8_safe(
-                                               reghop3((U8*)locinput,
-                                                       -1,
-                                                       (U8*)(reginfo->strbeg)),
-                                                    (U8*) reginfo->strend);
+                        if (locinput == reginfo->strbeg) {
+                            b1 = 0 /* isWORDCHAR_L1('\n') */;
+                        }
+                        else {
+                            U8 *p = reghop3((U8*)locinput, -1,
+                                            (U8*)(reginfo->strbeg));
+
+                            b1 = isWORDCHAR_utf8_safe(p, (U8*) reginfo->strend);
+                        }
                          b2 = (NEXTCHR_IS_EOS)
                              ? 0 /* isWORDCHAR_L1('\n') */
                              : isWORDCHAR_utf8_safe((U8*)locinput,
@@ -6740,6 +6871,46 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
          case ANYOFH:
              if (   ! utf8_target
                  ||   NEXTCHR_IS_EOS
+                ||   ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8(*locinput)
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHb:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                ||   ANYOF_FLAGS(scan) != (U8) *locinput
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                  utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHr:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                || ! inRANGE((U8) NATIVE_UTF8_TO_I8(*locinput),
+                             LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)),
+                             HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)))
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHs:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                ||   loceol - locinput < FLAGS(scan)
+                ||   memNE(locinput, ((struct regnode_anyofhs *) scan)->string, FLAGS(scan))
                 || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
                                                                     utf8_target))
              {
@@ -6748,6 +6919,56 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              goto increment_locinput;
              break;
  
+        case ANYOFR:
+            if (NEXTCHR_IS_EOS) {
+                sayNO;
+            }
+
+            if (utf8_target) {
+                if (    ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8(*locinput)
+                   || ! withinCOUNT(utf8_to_uvchr_buf((U8 *) locinput,
+                                                (U8 *) reginfo->strend,
+                                                NULL),
+                                    ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            else {
+                if (! withinCOUNT((U8) *locinput,
+                                  ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFRb:
+            if (NEXTCHR_IS_EOS) {
+                sayNO;
+            }
+
+            if (utf8_target) {
+                if (     ANYOF_FLAGS(scan) != (U8) *locinput
+                    || ! withinCOUNT(utf8_to_uvchr_buf((U8 *) locinput,
+                                                (U8 *) reginfo->strend,
+                                                NULL),
+                                     ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            else {
+                if (! withinCOUNT((U8) *locinput,
+                                  ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            goto increment_locinput;
+            break;
+
          /* The argument (FLAGS) to all the POSIX node types is the class number
           * */
  
@@ -6919,7 +7140,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                          }
                          break;
                  }
-                locinput += UTF8SKIP(locinput);
+                locinput += UTF8_SAFE_SKIP(locinput, reginfo->strend);
              }
              break;
  
@@ -6969,7 +7190,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
             break;
              
-       case NREFFL:  /*  /\g{name}/il  */
+       case REFFLN:  /*  /\g{name}/il  */
         {   /* The capture buffer cases.  The ones beginning with N for the
                named buffers just convert to the equivalent numbered and
                pretend they were called as the corresponding numbered buffer
@@ -6989,28 +7210,28 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             utf8_fold_flags = FOLDEQ_LOCALE;
             goto do_nref;
  
-       case NREFFA:  /*  /\g{name}/iaa  */
+       case REFFAN:  /*  /\g{name}/iaa  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFA;
             utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_nref;
  
-       case NREFFU:  /*  /\g{name}/iu  */
+       case REFFUN:  /*  /\g{name}/iu  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFU;
             utf8_fold_flags = 0;
             goto do_nref;
  
-       case NREFF:  /*  /\g{name}/i  */
+       case REFFN:  /*  /\g{name}/i  */
             folder = foldEQ;
             fold_array = PL_fold;
             type = REFF;
             utf8_fold_flags = 0;
             goto do_nref;
  
-       case NREF:  /*  /\g{name}/   */
+       case REFN:  /*  /\g{name}/   */
             type = REF;
             folder = NULL;
             fold_array = NULL;
@@ -7159,7 +7380,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  rex->recurse_locinput[arg]= locinput;
  
                  DEBUG_r({
-                    GET_RE_DEBUG_FLAGS_DECL;
+                    DECLARE_AND_GET_RE_DEBUG_FLAGS;
                      DEBUG_STACK_r({
                          Perl_re_exec_indentf( aTHX_
                              "entering GOSUB, prev_recurse_locinput=%p recurse_locinput[%d]=%p\n",
@@ -7178,7 +7399,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* NOTREACHED */
  
          case EVAL:  /*   /(?{...})B/   /(??{A})B/  and  /(?(?{...})X|Y)B/   */
-            if (cur_eval && cur_eval->locinput==locinput) {
+            if (logical == 2 && cur_eval && cur_eval->locinput==locinput) {
                 if ( ++nochange_depth > max_nochange_depth )
                      Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
              } else {
@@ -7406,7 +7627,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  PL_curpm = PL_reg_curpm;
  
                 if (logical != 2) {
-                    PUSH_STATE_GOTO(EVAL_B, next, locinput, loceol);
+                    PUSH_STATE_GOTO(EVAL_B, next, locinput, loceol,
+                                    script_run_begin);
                     /* NOTREACHED */
                  }
             }
@@ -7506,7 +7728,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 ST.prev_eval = cur_eval;
                 cur_eval = st;
                 /* now continue from first node in postoned RE */
-               PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput, loceol);
+               PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput,
+                                    loceol, script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
         }
  
@@ -7662,7 +7885,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
             break;
  
-       case NGROUPP:  /*  (?(<name>))  */
+       case GROUPPN:  /*  (?(<name>))  */
             /* reg_check_named_buff_matched returns 0 for no match */
             sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
             break;
@@ -7806,7 +8029,8 @@ NULL
             ST.count = -1;      /* this will be updated by WHILEM */
             ST.lastloc = NULL;  /* this will be updated by WHILEM */
  
-           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput, loceol);
+           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput, loceol,
+                                script_run_begin);
             NOT_REACHED; /* NOTREACHED */
         }
  
@@ -7854,7 +8078,8 @@ NULL
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
  
-               PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput, loceol);
+               PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
  
@@ -7962,7 +8187,7 @@ NULL
                 ST.save_curlyx = cur_curlyx;
                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
-                                    locinput, loceol);
+                                    locinput, loceol, script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
  
@@ -7973,7 +8198,8 @@ NULL
                              maxopenparen);
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
-               PUSH_STATE_GOTO(WHILEM_A_max, A, locinput, loceol);
+               PUSH_STATE_GOTO(WHILEM_A_max, A, locinput, loceol,
+                                script_run_begin);
                 NOT_REACHED; /* NOTREACHED */
             }
             goto do_whilem_B_max;
@@ -8025,7 +8251,7 @@ NULL
             ST.save_curlyx = cur_curlyx;
             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
-                                locinput, loceol);
+                                locinput, loceol, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
@@ -8056,7 +8282,7 @@ NULL
             REGCP_SET(ST.lastcp);
             PUSH_STATE_GOTO(WHILEM_A_min,
                 /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
-                locinput, loceol);
+                locinput, loceol, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
  #undef  ST
@@ -8078,9 +8304,11 @@ NULL
  
             /* Now go into the branch */
             if (has_cutgroup) {
-               PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput, loceol);
+               PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
+                                    script_run_begin);
             } else {
-               PUSH_STATE_GOTO(BRANCH_next, scan, locinput, loceol);
+               PUSH_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
+                                script_run_begin);
             }
             NOT_REACHED; /* NOTREACHED */
  
@@ -8088,7 +8316,8 @@ NULL
              sv_yes_mark = st->u.mark.mark_name = scan->flags
                  ? MUTABLE_SV(rexi->data->data[ ARG( scan ) ])
                  : NULL;
-            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol);
+            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol,
+                            script_run_begin);
              NOT_REACHED; /* NOTREACHED */
  
          case CUTGROUP_next_fail:
@@ -8165,7 +8394,8 @@ NULL
                 goto curlym_do_B;
  
           curlym_do_A: /* execute the A in /A{m,n}B/  */
-           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput, loceol); /* match A */
+           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput, loceol, /* match A */
+                                script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case CURLYM_A: /* we've just matched an A */
@@ -8235,8 +8465,15 @@ NULL
                 );
             if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
                  if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
-                    if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
-                        && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
+
+                           /* (We can use memEQ and memNE in this file without
+                            * having to worry about one being shorter than the
+                            * other, since the first byte of each gives the
+                            * length of the character) */
+                    if (   memNE(locinput, ST.c1_utf8, UTF8_SAFE_SKIP(locinput,
+                                                              reginfo->strend))
+                        && memNE(locinput, ST.c2_utf8, UTF8_SAFE_SKIP(locinput,
+                                                             reginfo->strend)))
                      {
                          /* simulate B failing */
                          DEBUG_OPTIMISE_r(
@@ -8282,7 +8519,8 @@ NULL
                 }
             }
             
-           PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput, loceol); /* match B */
+           PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput, loceol,   /* match B */
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case CURLYM_B_fail: /* just failed to match a B */
@@ -8498,20 +8736,26 @@ NULL
                     n = (ST.oldloc == locinput) ? 0 : 1;
                     if (ST.c1 == ST.c2) {
                         /* set n to utf8_distance(oldloc, locinput) */
-                       while (locinput <= ST.maxpos
-                              && memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)))
+                       while (    locinput <= ST.maxpos
+                               &&  locinput < loceol
+                               &&  memNE(locinput, ST.c1_utf8,
+                                    UTF8_SAFE_SKIP(locinput, reginfo->strend)))
                          {
-                           locinput += UTF8SKIP(locinput);
+                           locinput += UTF8_SAFE_SKIP(locinput,
+                                                       reginfo->strend);
                             n++;
                         }
                     }
                     else {
                         /* set n to utf8_distance(oldloc, locinput) */
-                       while (locinput <= ST.maxpos
-                              && memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
-                              && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
+                       while (   locinput <= ST.maxpos
+                               && locinput < loceol
+                               && memNE(locinput, ST.c1_utf8,
+                                     UTF8_SAFE_SKIP(locinput, reginfo->strend))
+                               && memNE(locinput, ST.c2_utf8,
+                                    UTF8_SAFE_SKIP(locinput, reginfo->strend)))
                          {
-                           locinput += UTF8SKIP(locinput);
+                           locinput += UTF8_SAFE_SKIP(locinput, reginfo->strend);
                             n++;
                         }
                     }
@@ -8576,7 +8820,8 @@ NULL
  
            curly_try_B_min:
              CURLY_SETPAREN(ST.paren, ST.count);
-            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput, loceol);
+            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
  
@@ -8589,21 +8834,22 @@ NULL
                  if (ST.c1 != CHRTEST_VOID && could_match) {
                      if (! UTF8_IS_INVARIANT(UCHARAT(locinput)) && utf8_target)
                      {
-                        could_match = memEQ(locinput,
-                                            ST.c1_utf8,
-                                            UTF8SKIP(locinput))
-                                    || memEQ(locinput,
-                                             ST.c2_utf8,
-                                             UTF8SKIP(locinput));
+                        could_match =  memEQ(locinput, ST.c1_utf8,
+                                             UTF8_SAFE_SKIP(locinput,
+                                                            reginfo->strend))
+                                    || memEQ(locinput, ST.c2_utf8,
+                                             UTF8_SAFE_SKIP(locinput,
+                                                            reginfo->strend));
                      }
                      else {
-                        could_match = UCHARAT(locinput) == ST.c1
-                                      || UCHARAT(locinput) == ST.c2;
+                        could_match =   UCHARAT(locinput) == ST.c1
+                                     || UCHARAT(locinput) == ST.c2;
                      }
                  }
                  if (ST.c1 == CHRTEST_VOID || could_match) {
                     CURLY_SETPAREN(ST.paren, ST.count);
-                   PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput, loceol);
+                   PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput, loceol,
+                                    script_run_begin);
                     NOT_REACHED; /* NOTREACHED */
                 }
             }
@@ -8658,8 +8904,9 @@ NULL
  
                  SET_RECURSE_LOCINPUT("FAKE-END[after]", cur_eval->locinput);
  
-                PUSH_YES_STATE_GOTO(EVAL_postponed_AB, st->u.eval.prev_eval->u.eval.B,
-                                    locinput, loceol); /* match B */
+                PUSH_YES_STATE_GOTO(EVAL_postponed_AB,          /* match B */
+                                    st->u.eval.prev_eval->u.eval.B,
+                                    locinput, loceol, script_run_begin);
             }
  
             if (locinput < reginfo->till) {
@@ -8710,8 +8957,8 @@ NULL
                  PERL_UINT_FAST8_T back_count = scan->flags;
                 char * s;
  
-                /* Lookbehind ends here */
-               ST.end = locinput;
+                /* Lookbehind can look beyond the current position */
+               ST.end = loceol;
  
                  /* ... and starts at the first place in the input that is in
                   * the range of the possible start positions */
@@ -8745,7 +8992,8 @@ NULL
             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
             
             /* execute body of (?...A) */
-           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), ST.start, ST.end);
+           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), ST.start,
+                                ST.end, script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
          {
@@ -8779,6 +9027,7 @@ NULL
                  /* restore old position except for (?>...) */
                 locinput = st->locinput;
                  loceol = st->loceol;
+                script_run_begin = st->sr0;
             }
             scan = ST.me + ARG(ST.me);
             if (scan == ST.me)
@@ -8802,7 +9051,8 @@ NULL
         case PRUNE:   /*  (*PRUNE)   */
              if (scan->flags)
                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
-           PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol);
+           PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
  
         case COMMIT_next_fail:
@@ -8832,7 +9082,8 @@ NULL
                  = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
              mark_state = st;
              ST.mark_loc = locinput;
-            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput, loceol);
+            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput, loceol,
+                                script_run_begin);
              NOT_REACHED; /* NOTREACHED */
  
          case MARKPOINT_next:
@@ -8865,7 +9116,8 @@ NULL
                  /* (*SKIP) : if we fail we cut here*/
                  ST.mark_name = NULL;
                  ST.mark_loc = locinput;
-                PUSH_STATE_GOTO(SKIP_next,next, locinput, loceol);
+                PUSH_STATE_GOTO(SKIP_next,next, locinput, loceol,
+                                script_run_begin);
              } else {
                  /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was, 
                     otherwise do nothing.  Meaning we need to scan 
@@ -8878,7 +9130,8 @@ NULL
                                  find ) ) 
                      {
                          ST.mark_name = find;
-                        PUSH_STATE_GOTO( SKIP_next, next, locinput, loceol);
+                        PUSH_STATE_GOTO( SKIP_next, next, locinput, loceol,
+                                         script_run_begin);
                      }
                      cur = cur->u.mark.prev_mark;
                  }
@@ -8949,8 +9202,10 @@ NULL
         /* push a new regex state, then continue at scan  */
         {
             regmatch_state *newst;
+            DECLARE_AND_GET_RE_DEBUG_FLAGS;
  
-           DEBUG_STACK_r({
+            DEBUG_r( /* DEBUG_STACK_r */
+              if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_STACK)) {
                 regmatch_state *cur = st;
                 regmatch_state *curyes = yes_state;
                 U32 i;
@@ -8969,12 +9224,13 @@ NULL
                      if (curyes == cur)
                         curyes = cur->u.yes.prev_yes_state;
                  }
-            } else 
+            } else {
                  DEBUG_STATE_pp("push")
-            );
+            });
             depth++;
             st->locinput = locinput;
             st->loceol = loceol;
+            st->sr0 = script_run_begin;
             newst = st+1; 
             if (newst >  SLAB_LAST(PL_regmatch_slab))
                 newst = S_push_slab(aTHX);
@@ -8982,6 +9238,7 @@ NULL
  
             locinput = pushinput;
              loceol = pusheol;
+            script_run_begin = pushsr0;
             st = newst;
             continue;
              /* NOTREACHED */
@@ -9037,6 +9294,7 @@ NULL
          if (no_final) {
              locinput= st->locinput;
              loceol= st->loceol;
+            script_run_begin = st->sr0;
          }
         state_num = st->resume_state + no_final;
         goto reenter_switch;
@@ -9088,6 +9346,7 @@ NULL
         PL_regmatch_state = st;
         locinput= st->locinput;
         loceol= st->loceol;
+        script_run_begin = st->sr0;
  
         DEBUG_STATE_pp("pop");
         depth--;
@@ -9153,7 +9412,6 @@ STATIC I32
  S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
              char * loceol, regmatch_info *const reginfo, I32 max _pDEPTH)
  {
-    dVAR;
      char *scan;     /* Pointer to current position in target string */
      I32 c;
      char *this_eol = loceol;   /* potentially adjusted version. */
@@ -9224,6 +9482,22 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         else
             scan = this_eol;
         break;
+
+    case LEXACT_REQ8:
+        if (! utf8_target) {
+            break;
+        }
+        /* FALLTHROUGH */
+
+    case LEXACT:
+      {
+        U8 * string;
+        Size_t str_len;
+
+       string = (U8 *) STRINGl(p);
+        str_len = STR_LENl(p);
+        goto join_short_long_exact;
+
      case EXACTL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
          if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) {
@@ -9231,16 +9505,20 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          }
          goto do_exact;
  
-    case EXACT_ONLY8:
+    case EXACT_REQ8:
          if (! utf8_target) {
              break;
          }
          /* FALLTHROUGH */
      case EXACT:
        do_exact:
-        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+       string = (U8 *) STRINGs(p);
+        str_len = STR_LENs(p);
  
-       c = (U8)*STRING(p);
+      join_short_long_exact:
+        assert(str_len == reginfo->is_utf8_pat ? UTF8SKIP(string) : 1);
+
+       c = *string;
  
          /* Can use a simple find if the pattern char to match on is invariant
           * under UTF-8, or both target and pattern aren't UTF-8.  Note that we
@@ -9262,8 +9540,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                   * string EQ */
                  while (hardcount < max
                         && scan < this_eol
-                       && (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
-                       && memEQ(scan, STRING(p), scan_char_len))
+                       && (scan_char_len = UTF8SKIP(scan)) <= str_len
+                       && memEQ(scan, string, scan_char_len))
                  {
                      scan += scan_char_len;
                      hardcount++;
@@ -9273,7 +9551,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
                  /* Target isn't utf8; convert the character in the UTF-8
                   * pattern to non-UTF8, and do a simple find */
-                c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
+                c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(string + 1));
                  scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
              } /* else pattern char is above Latin1, can't possibly match the
                   non-UTF-8 target */
@@ -9297,6 +9575,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             }
         }
         break;
+      }
  
      case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
          assert(! reginfo->is_utf8_pat);
@@ -9329,7 +9608,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                                      | FOLDEQ_S2_FOLDS_SANE;
          goto do_exactf;
  
-    case EXACTFU_ONLY8:
+    case EXACTFU_REQ8:
          if (! utf8_target) {
              break;
          }
@@ -9347,7 +9626,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          int c1, c2;
          U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
  
-        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LENs(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRINGs(p)) : 1);
  
          if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
                                          reginfo))
@@ -9355,10 +9634,10 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
              if (c1 == CHRTEST_VOID) {
                  /* Use full Unicode fold matching */
                  char *tmpeol = loceol;
-                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
+                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRINGs(p)) : 1;
                  while (hardcount < max
                          && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
-                                             STRING(p), NULL, pat_len,
+                                             STRINGs(p), NULL, pat_len,
                                               reginfo->is_utf8_pat, utf8_flags))
                  {
                      scan = tmpeol;
@@ -9370,19 +9649,22 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  if (c1 == c2) {
                      while (scan < this_eol
                             && hardcount < max
-                           && memEQ(scan, c1_utf8, UTF8SKIP(scan)))
+                           && memEQ(scan, c1_utf8, UTF8_SAFE_SKIP(scan,
+                                                                  loceol)))
                      {
-                        scan += UTF8SKIP(scan);
+                        scan += UTF8SKIP(c1_utf8);
                          hardcount++;
                      }
                  }
                  else {
                      while (scan < this_eol
                             && hardcount < max
-                           && (memEQ(scan, c1_utf8, UTF8SKIP(scan))
-                               || memEQ(scan, c2_utf8, UTF8SKIP(scan))))
+                           && (   memEQ(scan, c1_utf8, UTF8_SAFE_SKIP(scan,
+                                                                     loceol))
+                               || memEQ(scan, c2_utf8, UTF8_SAFE_SKIP(scan,
+                                                                     loceol))))
                      {
-                        scan += UTF8SKIP(scan);
+                        scan += UTF8_SAFE_SKIP(scan, loceol);
                          hardcount++;
                      }
                  }
@@ -9473,13 +9755,110 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          break;
  
      case ANYOFH:
-        if (utf8_target) while (   hardcount < max
-                                && scan < this_eol
-                                && reginclass(prog, p, (U8*)scan, (U8*) this_eol,
-                                                                  TRUE))
-        {
-            scan += UTF8SKIP(scan);
-            hardcount++;
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (  hardcount < max
+                   && scan < this_eol
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHb:
+        if (utf8_target) {  /* ANYOFHb only can match UTF-8 targets */
+
+            /* we know the first byte must be the FLAGS field */
+            while (   hardcount < max
+                   && scan < this_eol
+                   && (U8) *scan == ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol,
+                                                              TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHr:
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (  hardcount < max
+                   && scan < this_eol
+                   && inRANGE(NATIVE_UTF8_TO_I8(*scan),
+                              LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)),
+                              HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)))
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHs:
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (   hardcount < max
+                   && scan + FLAGS(p) < this_eol
+                   && memEQ(scan, ((struct regnode_anyofhs *) p)->string, FLAGS(p))
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFR:
+        if (utf8_target) {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) scan,
+                                                (U8 *) this_eol,
+                                                NULL),
+                                  ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        else {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && withinCOUNT((U8) *scan, ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan++;
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFRb:
+        if (utf8_target) {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && (U8) *scan == ANYOF_FLAGS(p)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) scan,
+                                                (U8 *) this_eol,
+                                                NULL),
+                                  ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        else {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && withinCOUNT((U8) *scan, ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan++;
+                hardcount++;
+            }
          }
          break;
  
@@ -9694,7 +10073,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      *startposp = scan;
  
      DEBUG_r({
-       GET_RE_DEBUG_FLAGS_DECL;
+       DECLARE_AND_GET_RE_DEBUG_FLAGS;
         DEBUG_EXECUTE_r({
             SV * const prop = sv_newmortal();
              regprop(prog, prop, p, reginfo, NULL);
@@ -9725,8 +10104,9 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  STATIC bool
  S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const U8* const p_end, const bool utf8_target)
  {
-    dVAR;
-    const char flags = ANYOF_FLAGS(n);
+    const char flags = (inRANGE(OP(n), ANYOFH, ANYOFHs))
+                        ? 0
+                        : ANYOF_FLAGS(n);
      bool match = FALSE;
      UV c = *p;
  
@@ -9753,7 +10133,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
      }
  
      /* If this character is potentially in the bitmap, check it */
-    if (c < NUM_ANYOF_CODE_POINTS && OP(n) != ANYOFH) {
+    if (c < NUM_ANYOF_CODE_POINTS && ! inRANGE(OP(n), ANYOFH, ANYOFHb)) {
         if (ANYOF_BITMAP_TEST(n, c))
             match = TRUE;
         else if ((flags
@@ -9766,7 +10146,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
         }
         else if (flags & ANYOF_LOCALE_FLAGS) {
             if (  (flags & ANYOFL_FOLD)
-                && c < sizeof(PL_fold_locale)
+                && c < 256
                 && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
              {
                  match = TRUE;
@@ -9854,8 +10234,14 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
                           && IN_UTF8_CTYPE_LOCALE)))
          {
              SV* only_utf8_locale = NULL;
-           SV * const definition = _get_regclass_nonbitmap_data(prog, n, TRUE,
-                                                   0, &only_utf8_locale, NULL);
+           SV * const definition =
+#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
+                get_regclass_nonbitmap_data(prog, n, TRUE, 0,
+                                            &only_utf8_locale, NULL);
+#else
+                get_re_gclass_nonbitmap_data(prog, n, TRUE, 0,
+                                             &only_utf8_locale, NULL);
+#endif
             if (definition) {
                  U8 utf8_buffer[2];
                 U8 * utf8_p;
@@ -10055,6 +10441,7 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
      regmatch_info_aux_eval *eval_state = reginfo->info_aux_eval;
  
      eval_state->rex = rex;
+    eval_state->sv  = reginfo->sv;
  
      if (reginfo->sv) {
          /* Make $_ available to executed code. */
@@ -10062,6 +10449,8 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
              SAVE_DEFSV;
              DEFSV_set(reginfo->sv);
          }
+        /* will be dec'd by S_cleanup_regmatch_info_aux */
+        SvREFCNT_inc_NN(reginfo->sv);
  
          if (!(mg = mg_find_mglob(reginfo->sv))) {
              /* prepare for quick setting of pos */
@@ -10087,7 +10476,7 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
              /* this regexp is also owned by the new PL_reg_curpm, which
                 will try to free it.  */
              av_push(PL_regex_padav, repointer);
-            PL_reg_curpm->op_pmoffset = av_tindex(PL_regex_padav);
+            PL_reg_curpm->op_pmoffset = av_top_index(PL_regex_padav);
              PL_regex_pad = AvARRAY(PL_regex_padav);
          }
  #endif
@@ -10153,6 +10542,7 @@ S_cleanup_regmatch_info_aux(pTHX_ void *arg)
          }
  
          PL_curpm = eval_state->curpm;
+        SvREFCNT_dec(eval_state->sv);
      }
  
      PL_regmatch_state = aux->old_regmatch_state;
@@ -10223,6 +10613,7 @@ S_to_byte_substr(pTHX_ regexp *prog)
             && !prog->substrs->data[i].substr) {
             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
             if (! sv_utf8_downgrade(sv, TRUE)) {
+                SvREFCNT_dec_NN(sv);
                  return FALSE;
              }
              if (SvVALID(prog->substrs->data[i].utf8_substr)) {
@@ -10246,23 +10637,22 @@ S_to_byte_substr(pTHX_ regexp *prog)
  #ifndef PERL_IN_XSUB_RE
  
  bool
-Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const UV cp)
+Perl_is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const UV cp)
  {
      /* Temporary helper function for toke.c.  Verify that the code point 'cp'
       * is a stand-alone grapheme.  The UTF-8 for 'cp' begins at position 's' in
       * the larger string bounded by 'strbeg' and 'strend'.
       *
-     * 'cp' needs to be assigned (if not a future version of the Unicode
+     * 'cp' needs to be assigned (if not, a future version of the Unicode
       * Standard could make it something that combines with adjacent characters,
       * so code using it would then break), and there has to be a GCB break
       * before and after the character. */
  
-    dVAR;
  
      GCB_enum cp_gcb_val, prev_cp_gcb_val, next_cp_gcb_val;
      const U8 * prev_cp_start;
  
-    PERL_ARGS_ASSERT__IS_GRAPHEME;
+    PERL_ARGS_ASSERT_IS_GRAPHEME;
  
      if (   UNLIKELY(UNICODE_IS_SUPER(cp))
          || UNLIKELY(UNICODE_IS_NONCHAR(cp)))
@@ -10310,7 +10700,7 @@ Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, cons
  }
  
  /*
-=head1 Unicode Support
+=for apidoc_section Unicode Support
  
  =for apidoc isSCRIPT_RUN
  
@@ -10379,7 +10769,6 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
       * characters for at least one language in the Unicode Common Locale Data
       * Repository [CLDR]. */
  
-    dVAR;
  
      /* Things that match /\d/u */
      SV * decimals_invlist = PL_XPosix_ptrs[_CC_DIGIT];
@@ -10468,10 +10857,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          /* If is within the range [+0 .. +9] of the script's zero, it also is a
           * digit in that script.  We can skip the rest of this code for this
           * character. */
-        if (UNLIKELY(   zero_of_run
-                     && cp >= zero_of_run
-                     && cp - zero_of_run <= 9))
-        {
+        if (UNLIKELY(zero_of_run && withinCOUNT(cp, zero_of_run, 9))) {
              continue;
          }
  
@@ -10692,7 +11078,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
           * several scripts, and the intersection is not empty.  However, if the
           * character is a decimal digit, it could still mean failure if it is
           * from the wrong sequence of 10.  So, we need to look at if it's a
-         * digit.  We've already handled the 10 decimal digits, and the next
+         * digit.  We've already handled the 10 digits [0-9], and the next
           * lowest one is this one: */
          if (cp < FIRST_NON_ASCII_DECIMAL_DIGIT) {
              continue;   /* Not a digit; this character is part of the run */
@@ -10704,9 +11090,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          if (   script_of_char >= 0
              && (zero_of_char = script_zeros[script_of_char]))
          {
-            if (   cp < zero_of_char
-                || cp > zero_of_char + 9)
-            {
+            if (! withinCOUNT(cp, zero_of_char, 9)) {
                  continue;   /* Not a digit; this character is part of the run
                               */
              }