regexec.c: restrict match to substring in regmatch()

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index 0ce50ff..bca2c4c 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -96,12 +96,6 @@ static const char* const non_utf8_target_but_utf8_required
                  = "Can't match, because target string needs to be in UTF-8\n";
  #endif
  
-/* Returns a boolean as to whether the input unsigned number is a power of 2
- * (2**0, 2**1, etc).  In other words if it has just a single bit set.
- * If not, subtracting 1 would leave the uppermost bit set, so the & would
- * yield non-zero */
-#define isPOWER_OF_2(n) ((n & (n-1)) == 0)
-
  #define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START {           \
      DEBUG_EXECUTE_r(Perl_re_printf( aTHX_  "%s", non_utf8_target_but_utf8_required));\
      goto target;                                                         \
@@ -113,13 +107,6 @@ static const char* const non_utf8_target_but_utf8_required
  #define        STATIC  static
  #endif
  
-/* Valid only if 'c', the character being looke-up, is an invariant under
- * UTF-8: it avoids the reginclass call if there are no complications: i.e., if
- * everything matchable is straight forward in the bitmap */
-#define REGINCLASS(prog,p,c,u)  (ANYOF_FLAGS(p)                             \
-                                ? reginclass(prog,p,c,c+1,u)                \
-                                : ANYOF_BITMAP_TEST(p,*(c)))
-
  /*
   * Forwards.
   */
@@ -165,48 +152,6 @@ static const char* const non_utf8_target_but_utf8_required
      : (U8*)(pos + off))
  #define HOP4c(pos,off,llim, rlim) ((char*)HOP4(pos,off,llim, rlim))
  
-#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
-#define NEXTCHR_IS_EOS (nextchr < 0)
-
-#define SET_nextchr \
-    nextchr = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
-
-#define SET_locinput(p) \
-    locinput = (p);  \
-    SET_nextchr
-
-
-#define LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist) STMT_START {   \
-        if (!swash_ptr) {                                                     \
-            U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;                       \
-            swash_ptr = _core_swash_init("utf8", property_name, &PL_sv_undef, \
-                                         1, 0, invlist, &flags);              \
-            assert(swash_ptr);                                                \
-        }                                                                     \
-    } STMT_END
-
-/* If in debug mode, we test that a known character properly matches */
-#ifdef DEBUGGING
-#   define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr,                          \
-                                          property_name,                      \
-                                          invlist,                            \
-                                          utf8_char_in_property)              \
-        LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist);               \
-        assert(swash_fetch(swash_ptr, (U8 *) utf8_char_in_property, TRUE));
-#else
-#   define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr,                          \
-                                          property_name,                      \
-                                          invlist,                            \
-                                          utf8_char_in_property)              \
-        LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist)
-#endif
-
-#define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS_DEBUG_TEST(           \
-                                        PL_utf8_swash_ptrs[_CC_WORDCHAR],     \
-                                        "",                                   \
-                                        PL_XPosix_ptrs[_CC_WORDCHAR],         \
-                                        LATIN_SMALL_LIGATURE_LONG_S_T_UTF8);
-
  #define PLACEHOLDER    /* Something for the preprocessor to grab onto */
  /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
  
@@ -234,22 +179,6 @@ static const char* const non_utf8_target_but_utf8_required
  
  #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
  
-#if 0 
-/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
-   we don't need this definition.  XXX These are now out-of-sync*/
-#define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
-#define IS_TEXTF(rn)  ( OP(rn)==EXACTFU || OP(rn)==EXACTFU_SS || OP(rn)==EXACTFA || OP(rn)==EXACTFA_NO_TRIE || OP(rn)==EXACTF || OP(rn)==REFF  || OP(rn)==NREFF )
-#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
-
-#else
-/* ... so we use this as its faster. */
-#define IS_TEXT(rn)   ( OP(rn)==EXACT || OP(rn)==EXACTL )
-#define IS_TEXTFU(rn)  ( OP(rn)==EXACTFU || OP(rn)==EXACTFLU8 || OP(rn)==EXACTFU_SS || OP(rn) == EXACTFA || OP(rn) == EXACTFA_NO_TRIE)
-#define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
-#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
-
-#endif
-
  /*
    Search for mandatory following text node; for lookahead, the text must
    follow but for lookbehind (rn->flags != 0) we skip to the next step.
@@ -360,7 +289,34 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
      );                                                          \
      regcpblow(cp)
  
+/* set the start and end positions of capture ix */
+#define CLOSE_CAPTURE(ix, s, e)                                            \
+    rex->offs[ix].start = s;                                               \
+    rex->offs[ix].end = e;                                                 \
+    if (ix > rex->lastparen)                                               \
+        rex->lastparen = ix;                                               \
+    rex->lastcloseparen = ix;                                              \
+    DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_                            \
+        "CLOSE: rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf "..%" IVdf " max: %" UVuf "\n", \
+        depth,                                                             \
+        PTR2UV(rex),                                                       \
+        PTR2UV(rex->offs),                                                 \
+        (UV)ix,                                                            \
+        (IV)rex->offs[ix].start,                                           \
+        (IV)rex->offs[ix].end,                                             \
+        (UV)rex->lastparen                                                 \
+    ))
+
  #define UNWIND_PAREN(lp, lcp)               \
+    DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_  \
+        "UNWIND_PAREN: rex=0x%" UVxf " offs=0x%" UVxf ": invalidate (%" UVuf "..%" UVuf "] set lcp: %" UVuf "\n", \
+        depth,                              \
+        PTR2UV(rex),                        \
+        PTR2UV(rex->offs),                  \
+        (UV)(lp),                           \
+        (UV)(rex->lastparen),               \
+        (UV)(lcp)                           \
+    ));                                     \
      for (n = rex->lastparen; n > lp; n--)   \
          rex->offs[n].end = -1;              \
      rex->lastparen = n;                     \
@@ -499,8 +455,36 @@ Perl_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
  
  #endif
  
+PERL_STATIC_INLINE I32
+S_foldEQ_latin1_s2_folded(const char *s1, const char *s2, I32 len)
+{
+    /* Compare non-UTF-8 using Unicode (Latin1) semantics.  s2 must already be
+     * folded.  Works on all folds representable without UTF-8, except for
+     * LATIN_SMALL_LETTER_SHARP_S, and does not check for this.  Nor does it
+     * check that the strings each have at least 'len' characters.
+     *
+     * There is almost an identical API function where s2 need not be folded:
+     * Perl_foldEQ_latin1() */
+
+    const U8 *a = (const U8 *)s1;
+    const U8 *b = (const U8 *)s2;
+
+    PERL_ARGS_ASSERT_FOLDEQ_LATIN1_S2_FOLDED;
+
+    assert(len >= 0);
+
+    while (len--) {
+        assert(! isUPPER_L1(*b));
+        if (toLOWER_L1(*a) != *b) {
+            return 0;
+        }
+        a++, b++;
+    }
+    return 1;
+}
+
  STATIC bool
-S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
+S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e)
  {
      /* Returns a boolean as to whether or not the (well-formed) UTF-8-encoded
       * 'character' is a member of the Posix character class given by 'classnum'
@@ -510,7 +494,9 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
       * This just calls isFOO_lc on the code point for the character if it is in
       * the range 0-255.  Outside that range, all characters use Unicode
       * rules, ignoring any locale.  So use the Unicode function if this class
-     * requires a swash, and use the Unicode macro otherwise. */
+     * requires an inversion list, and use the Unicode macro otherwise. */
+
+    dVAR;
  
      PERL_ARGS_ASSERT_ISFOO_UTF8_LC;
  
@@ -522,162 +508,23 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
                          EIGHT_BIT_UTF8_TO_NATIVE(*character, *(character + 1)));
      }
  
-    _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(character, character + UTF8SKIP(character));
-
-    if (classnum < _FIRST_NON_SWASH_CC) {
-
-        /* Initialize the swash unless done already */
-        if (! PL_utf8_swash_ptrs[classnum]) {
-            U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
-            PL_utf8_swash_ptrs[classnum] =
-                    _core_swash_init("utf8",
-                                     "",
-                                     &PL_sv_undef, 1, 0,
-                                     PL_XPosix_ptrs[classnum], &flags);
-        }
-
-        return cBOOL(swash_fetch(PL_utf8_swash_ptrs[classnum], (U8 *)
-                                 character,
-                                 TRUE /* is UTF */ ));
-    }
+    _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(character, e);
  
      switch ((_char_class_number) classnum) {
          case _CC_ENUM_SPACE:     return is_XPERLSPACE_high(character);
          case _CC_ENUM_BLANK:     return is_HORIZWS_high(character);
          case _CC_ENUM_XDIGIT:    return is_XDIGIT_high(character);
          case _CC_ENUM_VERTSPACE: return is_VERTWS_high(character);
-        default:                 break;
+        default:
+            return _invlist_contains_cp(PL_XPosix_ptrs[classnum],
+                                        utf8_to_uvchr_buf(character, e, NULL));
      }
  
      return FALSE; /* Things like CNTRL are always below 256 */
  }
  
-STATIC char *
-S_find_next_ascii(char * s, const char * send, const bool utf8_target)
-{
-    /* Returns the position of the first ASCII byte in the sequence between 's'
-     * and 'send-1' inclusive; returns 'send' if none found */
-
-    PERL_ARGS_ASSERT_FIND_NEXT_ASCII;
-
-#ifndef EBCDIC
-
-    if ((STRLEN) (send - s) >= PERL_WORDSIZE
-
-                            /* This term is wordsize if subword; 0 if not */
-                          + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
-
-                            /* 'offset' */
-                          - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
-    {
-
-        /* Process per-byte until reach word boundary.  XXX This loop could be
-         * eliminated if we knew that this platform had fast unaligned reads */
-        while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
-            if (isASCII(*s)) {
-                return s;
-            }
-            s++;    /* khw didn't bother creating a separate loop for
-                       utf8_target */
-        }
-
-        /* Here, we know we have at least one full word to process.  Process
-         * per-word as long as we have at least a full word left */
-        do {
-            PERL_UINTMAX_T complemented = ~ * (PERL_UINTMAX_T *) s;
-            if (complemented & PERL_VARIANTS_WORD_MASK)  {
-
-#if   BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    \
-   || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
-
-                s += _variant_byte_number(complemented);
-                return s;
-
-#else   /* If weird byte order, drop into next loop to do byte-at-a-time
-           checks. */
-
-                break;
-#endif
-            }
-
-            s += PERL_WORDSIZE;
-
-        } while (s + PERL_WORDSIZE <= send);
-    }
-
-#endif
-
-    /* Process per-character */
-    if (utf8_target) {
-        while (s < send) {
-            if (isASCII(*s)) {
-                return s;
-            }
-            s += UTF8SKIP(s);
-        }
-    }
-    else {
-        while (s < send) {
-            if (isASCII(*s)) {
-                return s;
-            }
-            s++;
-        }
-    }
-
-    return s;
-}
-
-STATIC char *
-S_find_next_non_ascii(char * s, const char * send, const bool utf8_target)
-{
-    /* Returns the position of the first non-ASCII byte in the sequence between
-     * 's' and 'send-1' inclusive; returns 'send' if none found */
-
-#ifdef EBCDIC
-
-    PERL_ARGS_ASSERT_FIND_NEXT_NON_ASCII;
-
-    if (utf8_target) {
-        while (s < send) {
-            if ( ! isASCII(*s)) {
-                return s;
-            }
-            s += UTF8SKIP(s);
-        }
-    }
-    else {
-        while (s < send) {
-            if ( ! isASCII(*s)) {
-                return s;
-            }
-            s++;
-        }
-    }
-
-    return s;
-
-#else
-
-    const U8 * next_non_ascii = NULL;
-
-    PERL_ARGS_ASSERT_FIND_NEXT_NON_ASCII;
-    PERL_UNUSED_ARG(utf8_target);
-
-    /* On ASCII platforms invariants and ASCII are identical, so if the string
-     * is entirely invariants, there is no non-ASCII character */
-    return (is_utf8_invariant_string_loc((U8 *) s,
-                                         (STRLEN) (send - s),
-                                         &next_non_ascii))
-            ? (char *) send
-            : (char *) next_non_ascii;
-
-#endif
-
-}
-
-STATIC char *
-S_find_span_end(char * s, const char * send, const char span_byte)
+STATIC U8 *
+S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
  {
      /* Returns the position of the first byte in the sequence between 's' and
       * 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found.
@@ -714,8 +561,15 @@ S_find_span_end(char * s, const char * send, const char span_byte)
                  continue;
              }
  
-            /* Here, at least one byte in the word isn't 'span_byte'.  This xor
-             * leaves 1 bits only in those non-matching bytes */
+            /* Here, at least one byte in the word isn't 'span_byte'. */
+
+#ifdef EBCDIC
+
+            break;
+
+#else
+
+            /* This xor leaves 1 bits only in those non-matching bytes */
              span_word ^= * (PERL_UINTMAX_T *) s;
  
              /* Make sure the upper bit of each non-matching byte is set.  This
@@ -727,6 +581,8 @@ S_find_span_end(char * s, const char * send, const char span_byte)
              /* That reduces the problem to what this function solves */
              return s + _variant_byte_number(span_word);
  
+#endif
+
          } while (s + PERL_WORDSIZE <= send);
      }
  
@@ -741,8 +597,8 @@ S_find_span_end(char * s, const char * send, const char span_byte)
      return s;
  }
  
-STATIC char *
-S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask)
+STATIC U8 *
+S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
  {
      /* Returns the position of the first byte in the sequence between 's'
       * and 'send-1' inclusive that when ANDed with 'mask' yields 'byte';
@@ -754,28 +610,32 @@ S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask)
      assert(send >= s);
      assert((byte & mask) == byte);
  
+#ifndef EBCDIC
+
      if ((STRLEN) (send - s) >= PERL_WORDSIZE
                            + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
                            - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
      {
-        PERL_UINTMAX_T word_complemented, mask_word;
+        PERL_UINTMAX_T word, mask_word;
  
          while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
-            if (((* (U8 *) s) & mask) == byte) {
+            if (((*s) & mask) == byte) {
                  return s;
              }
              s++;
          }
  
-        word_complemented = ~ (PERL_COUNT_MULTIPLIER * byte);
-        mask_word =            PERL_COUNT_MULTIPLIER * mask;
+        word      = PERL_COUNT_MULTIPLIER * byte;
+        mask_word = PERL_COUNT_MULTIPLIER * mask;
  
          do {
              PERL_UINTMAX_T masked = (* (PERL_UINTMAX_T *) s) & mask_word;
  
-            /* If 'masked' contains 'byte' within it, anding with the
-             * complement will leave those 8 bits 0 */
-            masked &= word_complemented;
+            /* If 'masked' contains bytes with the bit pattern of 'byte' within
+             * it, xoring with 'word' will leave each of the 8 bits in such
+             * bytes be 0, and no byte containing any other bit pattern will be
+             * 0. */
+            masked ^= word;
  
              /* This causes the most significant bit to be set to 1 for any
               * bytes in the word that aren't completely 0 */
@@ -803,8 +663,10 @@ S_find_next_masked(char * s, const char * send, const U8 byte, const U8 mask)
          } while (s + PERL_WORDSIZE <= send);
      }
  
+#endif
+
      while (s < send) {
-        if (((* (U8 *) s) & mask) == byte) {
+        if (((*s) & mask) == byte) {
              return s;
          }
          s++;
@@ -834,7 +696,7 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
          PERL_UINTMAX_T span_word, mask_word;
  
          while (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
-            if (((* (U8 *) s) & mask) != span_byte) {
+            if (((*s) & mask) != span_byte) {
                  return s;
              }
              s++;
@@ -851,17 +713,25 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
                  continue;
              }
  
+#ifdef EBCDIC
+
+            break;
+
+#else
+
              masked ^= span_word;
              masked |= masked << 1;
              masked |= masked << 2;
              masked |= masked << 4;
              return s + _variant_byte_number(masked);
  
+#endif
+
          } while (s + PERL_WORDSIZE <= send);
      }
  
      while (s < send) {
-        if (((* (U8 *) s) & mask) != span_byte) {
+        if (((*s) & mask) != span_byte) {
              return s;
          }
          s++;
@@ -1238,7 +1108,7 @@ Perl_re_intuit_start(pTHX_
  
              if (check_len > targ_len) {
                  DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
-                             "Anchored string too short...\n"));
+                             "Target string too short to match required substring...\n"));
                  goto fail_finish;
              }
  
@@ -1253,6 +1123,8 @@ Perl_re_intuit_start(pTHX_
                                  end_point - check_len
                              )
                              + check_len;
+                if (end_point < start_point)
+                    goto fail_finish;
              }
          }
  
@@ -1805,30 +1677,34 @@ Perl_re_intuit_start(pTHX_
  #define DECL_TRIE_TYPE(scan) \
      const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold,       \
                   trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold,              \
-                 trie_utf8l, trie_flu8 }                                            \
+                 trie_utf8l, trie_flu8, trie_flu8_latin }                           \
                      trie_type = ((scan->flags == EXACT)                             \
                                   ? (utf8_target ? trie_utf8 : trie_plain)           \
                                   : (scan->flags == EXACTL)                          \
                                      ? (utf8_target ? trie_utf8l : trie_plain)       \
-                                    : (scan->flags == EXACTFA)                      \
+                                    : (scan->flags == EXACTFAA)                     \
                                        ? (utf8_target                                \
                                           ? trie_utf8_exactfa_fold                   \
                                           : trie_latin_utf8_exactfa_fold)            \
                                        : (scan->flags == EXACTFLU8                   \
-                                         ? trie_flu8                                \
+                                         ? (utf8_target                             \
+                                           ? trie_flu8                              \
+                                           : trie_flu8_latin)                       \
                                           : (utf8_target                             \
                                             ? trie_utf8_fold                         \
-                                           :   trie_latin_utf8_fold)))
+                                           : trie_latin_utf8_fold)))
  
-#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
+/* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is
+ * 'foldbuf+sizeof(foldbuf)' */
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uc_end, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
  STMT_START {                                                                        \
      STRLEN skiplen;                                                                 \
      U8 flags = FOLD_FLAGS_FULL;                                                     \
      switch (trie_type) {                                                            \
      case trie_flu8:                                                                 \
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
-        if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) {                             \
-            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc));          \
+        if (UTF8_IS_ABOVE_LATIN1(*uc)) {                                            \
+            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end);                     \
          }                                                                           \
          goto do_trie_utf8_fold;                                                     \
      case trie_utf8_exactfa_fold:                                                    \
@@ -1837,25 +1713,29 @@ STMT_START {
      case trie_utf8_fold:                                                            \
        do_trie_utf8_fold:                                                            \
          if ( foldlen>0 ) {                                                          \
-            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
+            uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags );     \
              foldlen -= len;                                                         \
              uscan += len;                                                           \
              len=0;                                                                  \
          } else {                                                                    \
-            len = UTF8SKIP(uc);                                                     \
-            uvc = _toFOLD_utf8_flags( (const U8*) uc, uc + len, foldbuf, &foldlen,  \
+            uvc = _toFOLD_utf8_flags( (const U8*) uc, uc_end, foldbuf, &foldlen,    \
                                                                              flags); \
+            len = UTF8SKIP(uc);                                                     \
              skiplen = UVCHR_SKIP( uvc );                                            \
              foldlen -= skiplen;                                                     \
              uscan = foldbuf + skiplen;                                              \
          }                                                                           \
          break;                                                                      \
+    case trie_flu8_latin:                                                           \
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
+        goto do_trie_latin_utf8_fold;                                               \
      case trie_latin_utf8_exactfa_fold:                                              \
          flags |= FOLD_FLAGS_NOMIX_ASCII;                                            \
          /* FALLTHROUGH */                                                           \
      case trie_latin_utf8_fold:                                                      \
+      do_trie_latin_utf8_fold:                                                      \
          if ( foldlen>0 ) {                                                          \
-            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
+            uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags );     \
              foldlen -= len;                                                         \
              uscan += len;                                                           \
              len=0;                                                                  \
@@ -1870,11 +1750,11 @@ STMT_START {
      case trie_utf8l:                                                                \
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
          if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) {                             \
-            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc));          \
+            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end);                     \
          }                                                                           \
          /* FALLTHROUGH */                                                           \
      case trie_utf8:                                                                 \
-        uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags );        \
+        uvc = utf8n_to_uvchr( (const U8*) uc, uc_end - uc, &len, uniflags );        \
          break;                                                                      \
      case trie_plain:                                                                \
          uvc = (UV)*uc;                                                              \
@@ -1898,17 +1778,6 @@ STMT_START {
      dump_exec_pos(li,s,(reginfo->strend),(reginfo->strbeg), \
                  startpos, doutf8, depth)
  
-#define REXEC_FBC_EXACTISH_SCAN(COND)                     \
-STMT_START {                                              \
-    while (s <= e) {                                      \
-       if ( (COND)                                       \
-            && (ln == 1 || folder(s, pat_string, ln))    \
-            && (reginfo->intuit || regtry(reginfo, &s)) )\
-           goto got_it;                                  \
-       s++;                                              \
-    }                                                     \
-} STMT_END
-
  #define REXEC_FBC_SCAN(UTF8, CODE)                          \
      STMT_START {                                            \
          while (s < strend) {                                \
@@ -1960,7 +1829,7 @@ STMT_START {                                              \
   * there is no such occurrence. */
  #define REXEC_FBC_FIND_NEXT_SCAN(UTF8, f)                   \
      while (s < strend) {                                    \
-        s = f;                                              \
+        s = (f);                                            \
          if (s >= strend) {                                  \
              break;                                          \
          }                                                   \
@@ -2032,7 +1901,6 @@ STMT_START {                                              \
                                                         0, UTF8_ALLOW_DEFAULT); \
      }                                                                          \
      tmp = TEST_UV(tmp);                                                        \
-    LOAD_UTF8_CHARCLASS_ALNUM();                                               \
      REXEC_FBC_SCAN(1,  /* 1=>is-utf8; advances s while s < strend */           \
          if (tmp == ! (TEST_UTF8((U8 *) s, (U8 *) reginfo->strend))) {          \
              tmp = !tmp;                                                        \
@@ -2113,7 +1981,7 @@ STMT_START {                                              \
  #ifdef DEBUGGING
  static IV
  S_get_break_val_cp_checked(SV* const invlist, const UV cp_in) {
-  IV cp_out = Perl__invlist_search(invlist, cp_in);
+  IV cp_out = _invlist_search(invlist, cp_in);
    assert(cp_out >= 0);
    return cp_out;
  }
@@ -2210,7 +2078,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
      STRLEN lnc;
      U8 c1;
      U8 c2;
-    char *e;
+    char *e = NULL;
  
      /* In some cases we accept only the first occurence of 'x' in a sequence of
       * them.  This variable points to just beyond the end of the previous
@@ -2234,6 +2102,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
  
      /* We know what class it must start with. */
      switch (OP(c)) {
+    case ANYOFPOSIXL:
      case ANYOFL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
  
@@ -2248,7 +2117,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
              REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */
                        reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
          }
-        else if (ANYOF_FLAGS(c)) {
+        else if (ANYOF_FLAGS(c) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+            /* We know that s is in the bitmap range since the target isn't
+             * UTF-8, so what happens for out-of-range values is not relevant,
+             * so exclude that from the flags */
              REXEC_FBC_CLASS_SCAN(0, reginclass(prog,c, (U8*)s, (U8*)s+1, 0));
          }
          else {
@@ -2259,25 +2131,51 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
      case ANYOFM:    /* ARG() is the base byte; FLAGS() the mask byte */
          /* UTF-8ness doesn't matter, so use 0 */
          REXEC_FBC_FIND_NEXT_SCAN(0,
-                                 find_next_masked(s, strend, ARG(c), FLAGS(c)));
+         (char *) find_next_masked((U8 *) s, (U8 *) strend,
+                                   (U8) ARG(c), FLAGS(c)));
+        break;
+
+    case NANYOFM:
+        REXEC_FBC_FIND_NEXT_SCAN(0,
+         (char *) find_span_end_mask((U8 *) s, (U8 *) strend,
+                                   (U8) ARG(c), FLAGS(c)));
          break;
  
-    case EXACTFA_NO_TRIE:   /* This node only generated for non-utf8 patterns */
+    case ANYOFH:
+        if (utf8_target) REXEC_FBC_CLASS_SCAN(TRUE,
+                      reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
+        break;
+
+    case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
          assert(! is_utf8_pat);
         /* FALLTHROUGH */
-    case EXACTFA:
-        if (is_utf8_pat || utf8_target) {
+    case EXACTFAA:
+        if (is_utf8_pat) {
+            utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII
+                             |FOLDEQ_S2_ALREADY_FOLDED|FOLDEQ_S2_FOLDS_SANE;
+            goto do_exactf_utf8;
+        }
+        else if (utf8_target) {
+
+            /* Here, and elsewhere in this file, the reason we can't consider a
+             * non-UTF-8 pattern already folded in the presence of a UTF-8
+             * target is because any MICRO SIGN in the pattern won't be folded.
+             * Since the fold of the MICRO SIGN requires UTF-8 to represent, we
+             * can consider a non-UTF-8 pattern folded when matching a
+             * non-UTF-8 target */
              utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
              goto do_exactf_utf8;
          }
-        fold_array = PL_fold_latin1;    /* Latin1 folds are not affected by */
-        folder = foldEQ_latin1;                /* /a, except the sharp s one which */
-        goto do_exactf_non_utf8;       /* isn't dealt with by these */
+
+        /* Latin1 folds are not affected by /a, except it excludes the sharp s,
+         * which these functions don't handle anyway */
+        fold_array = PL_fold_latin1;
+        folder = foldEQ_latin1_s2_folded;
+        goto do_exactf_non_utf8;
  
      case EXACTF:   /* This node only generated for non-utf8 patterns */
          assert(! is_utf8_pat);
          if (utf8_target) {
-            utf8_fold_flags = 0;
              goto do_exactf_utf8;
          }
          fold_array = PL_fold;
@@ -2294,10 +2192,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          folder = foldEQ_locale;
          goto do_exactf_non_utf8;
  
-    case EXACTFU_SS:
-        if (is_utf8_pat) {
-            utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
-        }
+    case EXACTFUP:      /* Problematic even though pattern isn't UTF-8.  Use
+                           full functionality normally not done except for
+                           UTF-8 */
+        assert(! is_utf8_pat);
          goto do_exactf_utf8;
  
      case EXACTFLU8:
@@ -2309,9 +2207,17 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                                               | FOLDEQ_S2_FOLDS_SANE;
              goto do_exactf_utf8;
  
+    case EXACTFU_ONLY8:
+        if (! utf8_target) {
+            break;
+        }
+        assert(is_utf8_pat);
+        utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
+        goto do_exactf_utf8;
+
      case EXACTFU:
          if (is_utf8_pat || utf8_target) {
-            utf8_fold_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+            utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
              goto do_exactf_utf8;
          }
  
@@ -2319,7 +2225,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
           * so we don't have to worry here about this single special case
           * in the Latin1 range */
          fold_array = PL_fold_latin1;
-        folder = foldEQ_latin1;
+        folder = foldEQ_latin1_s2_folded;
  
          /* FALLTHROUGH */
  
@@ -2348,10 +2254,57 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          c1 = *pat_string;
          c2 = fold_array[c1];
          if (c1 == c2) { /* If char and fold are the same */
-            REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
+            while (s <= e) {
+                s = (char *) memchr(s, c1, e + 1 - s);
+                if (s == NULL) {
+                    break;
+                }
+
+                /* Check that the rest of the node matches */
+                if (   (ln == 1 || folder(s + 1, pat_string + 1, ln - 1))
+                    && (reginfo->intuit || regtry(reginfo, &s)) )
+                {
+                    goto got_it;
+                }
+                s++;
+            }
          }
          else {
-            REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
+            U8 bits_differing = c1 ^ c2;
+
+            /* If the folds differ in one bit position only, we can mask to
+             * match either of them, and can use this faster find method.  Both
+             * ASCII and EBCDIC tend to have their case folds differ in only
+             * one position, so this is very likely */
+            if (LIKELY(PL_bitcount[bits_differing] == 1)) {
+                bits_differing = ~ bits_differing;
+                while (s <= e) {
+                    s = (char *) find_next_masked((U8 *) s, (U8 *) e + 1,
+                                        (c1 & bits_differing), bits_differing);
+                    if (s > e) {
+                        break;
+                    }
+
+                    if (   (ln == 1 || folder(s + 1, pat_string + 1, ln - 1))
+                        && (reginfo->intuit || regtry(reginfo, &s)) )
+                    {
+                        goto got_it;
+                    }
+                    s++;
+                }
+            }
+            else {  /* Otherwise, stuck with looking byte-at-a-time.  This
+                       should actually happen only in EXACTFL nodes */
+                while (s <= e) {
+                    if (    (*(U8*)s == c1 || *(U8*)s == c2)
+                        && (ln == 1 || folder(s + 1, pat_string + 1, ln - 1))
+                        && (reginfo->intuit || regtry(reginfo, &s)) )
+                    {
+                        goto got_it;
+                    }
+                    s++;
+                }
+            }
          }
          break;
  
@@ -2727,22 +2680,6 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          );
          break;
  
-    case ASCII:
-        REXEC_FBC_FIND_NEXT_SCAN(0, find_next_ascii(s, strend, utf8_target));
-        break;
-
-    case NASCII:
-        if (utf8_target) {
-            REXEC_FBC_FIND_NEXT_SCAN(1, find_next_non_ascii(s, strend,
-                                                            utf8_target));
-        }
-        else {
-            REXEC_FBC_FIND_NEXT_SCAN(0, find_next_non_ascii(s, strend,
-                                                            utf8_target));
-        }
-
-        break;
-
      /* The argument to all the POSIX node types is the class number to pass to
       * _generic_isCC() to build a mask for searching in PL_charclass[] */
  
@@ -2752,7 +2689,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
  
      case POSIXL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-        REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)),
+        REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s, (U8 *) strend)),
                          to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
          break;
  
@@ -2807,29 +2744,15 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
  
            posix_utf8:
              classnum = (_char_class_number) FLAGS(c);
-            if (classnum < _FIRST_NON_SWASH_CC) {
-                while (s < strend) {
-
-                    /* We avoid loading in the swash as long as possible, but
-                     * should we have to, we jump to a separate loop.  This
-                     * extra 'if' statement is what keeps this code from being
-                     * just a call to REXEC_FBC_CLASS_SCAN() */
-                    if (UTF8_IS_ABOVE_LATIN1(*s)) {
-                        goto found_above_latin1;
-                    }
-
-                    REXEC_FBC_CLASS_SCAN_GUTS(1, (UTF8_IS_INVARIANT(*s)
-                         && to_complement ^ cBOOL(_generic_isCC((U8) *s,
-                                                                classnum)))
-                        || (   UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, strend)
-                            && to_complement ^ cBOOL(
-                                _generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*s,
-                                                                      *(s + 1)),
-                                              classnum))));
-                }
-            }
-            else switch (classnum) {    /* These classes are implemented as
-                                           macros */
+            switch (classnum) {
+                default:
+                    REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */
+                        to_complement ^ cBOOL(_invlist_contains_cp(
+                                              PL_XPosix_ptrs[classnum],
+                                              utf8_to_uvchr_buf((U8 *) s,
+                                                                (U8 *) strend,
+                                                                NULL))));
+                    break;
                  case _CC_ENUM_SPACE:
                      REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */
                          to_complement ^ cBOOL(isSPACE_utf8_safe(s, strend)));
@@ -2854,37 +2777,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      REXEC_FBC_CLASS_SCAN(1,
                          to_complement ^ cBOOL(isCNTRL_utf8_safe(s, strend)));
                      break;
-
-                default:
-                    Perl_croak(aTHX_ "panic: find_byclass() node %d='%s' has an unexpected character class '%d'", OP(c), PL_reg_name[OP(c)], classnum);
-                    NOT_REACHED; /* NOTREACHED */
              }
          }
          break;
  
-      found_above_latin1:   /* Here we have to load a swash to get the result
-                               for the current code point */
-        if (! PL_utf8_swash_ptrs[classnum]) {
-            U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
-            PL_utf8_swash_ptrs[classnum] =
-                    _core_swash_init("utf8",
-                                     "",
-                                     &PL_sv_undef, 1, 0,
-                                     PL_XPosix_ptrs[classnum], &flags);
-        }
-
-        /* This is a copy of the loop above for swash classes, though using the
-         * FBC macro instead of being expanded out.  Since we've loaded the
-         * swash, we don't have to check for that each time through the loop */
-        REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */
-                to_complement ^ cBOOL(_generic_utf8_safe(
-                                      classnum,
-                                      s,
-                                      strend,
-                                      swash_fetch(PL_utf8_swash_ptrs[classnum],
-                                                  (U8 *) s, TRUE))));
-        break;
-
      case AHOCORASICKC:
      case AHOCORASICK:
          {
@@ -3000,10 +2896,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                      }
                      points[pointpos++ % maxlen]= uc;
                      if (foldlen || uc < (U8*)strend) {
-                        REXEC_TRIE_READ_CHAR(trie_type, trie,
-                                         widecharmap, uc,
-                                         uscan, len, uvc, charid, foldlen,
-                                         foldbuf, uniflags);
+                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
+                                             (U8 *) strend, uscan, len, uvc,
+                                             charid, foldlen, foldbuf,
+                                             uniflags);
                          DEBUG_TRIE_EXECUTE_r({
                              dump_exec_pos( (char *)uc, c, strend,
                                          real_start, s, utf8_target, 0);
@@ -3518,7 +3414,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
             we switch it back; otherwise we leave it swapped.
          */
          swap = prog->offs;
-        /* do we need a save destructor here for eval dies? */
+        /* avoid leak if we die, or clean up anyway if match completes */
+        SAVEFREEPV(swap);
          Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
          DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
             "rex=0x%" UVxf " saving  offs: orig=0x%" UVxf " new=0x%" UVxf "\n",
@@ -3903,17 +3800,6 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
          goto phooey;
      }
  
-    DEBUG_BUFFERS_r(
-       if (swap)
-            Perl_re_exec_indentf( aTHX_
-               "rex=0x%" UVxf " freeing offs: 0x%" UVxf "\n",
-               0,
-                PTR2UV(prog),
-               PTR2UV(swap)
-           );
-    );
-    Safefree(swap);
-
      /* clean up; this will trigger destructors that will free all slabs
       * above the current one, and cleanup the regmatch_info_aux
       * and regmatch_info_aux_eval sructs */
@@ -3935,24 +3821,29 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
      DEBUG_EXECUTE_r(Perl_re_printf( aTHX_  "%sMatch failed%s\n",
                           PL_colors[4], PL_colors[5]));
  
-    /* clean up; this will trigger destructors that will free all slabs
-     * above the current one, and cleanup the regmatch_info_aux
-     * and regmatch_info_aux_eval sructs */
-
-    LEAVE_SCOPE(oldsave);
-
      if (swap) {
-        /* we failed :-( roll it back */
+        /* we failed :-( roll it back.
+         * Since the swap buffer will be freed on scope exit which follows
+         * shortly, restore the old captures by copying 'swap's original
+         * data to the new offs buffer
+         */
          DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
-           "rex=0x%" UVxf " rolling back offs: freeing=0x%" UVxf " restoring=0x%" UVxf "\n",
+           "rex=0x%" UVxf " rolling back offs: 0x%" UVxf " will be freed; restoring data to =0x%" UVxf "\n",
             0,
              PTR2UV(prog),
             PTR2UV(prog->offs),
             PTR2UV(swap)
         ));
-        Safefree(prog->offs);
-        prog->offs = swap;
+
+        Copy(swap, prog->offs, prog->nparens + 1, regexp_paren_pair);
      }
+
+    /* clean up; this will trigger destructors that will free all slabs
+     * above the current one, and cleanup the regmatch_info_aux
+     * and regmatch_info_aux_eval sructs */
+
+    LEAVE_SCOPE(oldsave);
+
      return 0;
  }
  
@@ -4035,18 +3926,6 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
      return 0;
  }
  
-
-#define sayYES goto yes
-#define sayNO goto no
-#define sayNO_SILENT goto no_silent
-
-/* we dont use STMT_START/END here because it leads to 
-   "unreachable code" warnings, which are bogus, but distracting. */
-#define CACHEsayNO \
-    if (ST.cache_mask) \
-       reginfo->info_aux->poscache[ST.cache_offset] |= ST.cache_mask; \
-    sayNO
-
  /* this is used to determine how far from the left messages like
     'failed...' are printed in regexec.c. It should be set such that
     messages are inline with the regop output that created them.
@@ -4069,12 +3948,6 @@ Perl_re_exec_indentf(pTHX_ const char *fmt, U32 depth, ...)
  }
  #endif /* DEBUGGING */
  
-
-#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
-#define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
-#define CHRTEST_NOT_A_CP_1 -999
-#define CHRTEST_NOT_A_CP_2 -998
-
  /* grab a new slab and return the first slot in it */
  
  STATIC regmatch_state *
@@ -4091,177 +3964,6 @@ S_push_slab(pTHX)
      return SLAB_FIRST(s);
  }
  
-
-/* push a new state then goto it */
-
-#define PUSH_STATE_GOTO(state, node, input) \
-    pushinput = input; \
-    scan = node; \
-    st->resume_state = state; \
-    goto push_state;
-
-/* push a new state with success backtracking, then goto it */
-
-#define PUSH_YES_STATE_GOTO(state, node, input) \
-    pushinput = input; \
-    scan = node; \
-    st->resume_state = state; \
-    goto push_yes_state;
-
-
-
-
-/*
-
-regmatch() - main matching routine
-
-This is basically one big switch statement in a loop. We execute an op,
-set 'next' to point the next op, and continue. If we come to a point which
-we may need to backtrack to on failure such as (A|B|C), we push a
-backtrack state onto the backtrack stack. On failure, we pop the top
-state, and re-enter the loop at the state indicated. If there are no more
-states to pop, we return failure.
-
-Sometimes we also need to backtrack on success; for example /A+/, where
-after successfully matching one A, we need to go back and try to
-match another one; similarly for lookahead assertions: if the assertion
-completes successfully, we backtrack to the state just before the assertion
-and then carry on.  In these cases, the pushed state is marked as
-'backtrack on success too'. This marking is in fact done by a chain of
-pointers, each pointing to the previous 'yes' state. On success, we pop to
-the nearest yes state, discarding any intermediate failure-only states.
-Sometimes a yes state is pushed just to force some cleanup code to be
-called at the end of a successful match or submatch; e.g. (??{$re}) uses
-it to free the inner regex.
-
-Note that failure backtracking rewinds the cursor position, while
-success backtracking leaves it alone.
-
-A pattern is complete when the END op is executed, while a subpattern
-such as (?=foo) is complete when the SUCCESS op is executed. Both of these
-ops trigger the "pop to last yes state if any, otherwise return true"
-behaviour.
-
-A common convention in this function is to use A and B to refer to the two
-subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
-the subpattern to be matched possibly multiple times, while B is the entire
-rest of the pattern. Variable and state names reflect this convention.
-
-The states in the main switch are the union of ops and failure/success of
-substates associated with with that op.  For example, IFMATCH is the op
-that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
-'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
-successfully matched A and IFMATCH_A_fail is a state saying that we have
-just failed to match A. Resume states always come in pairs. The backtrack
-state we push is marked as 'IFMATCH_A', but when that is popped, we resume
-at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
-on success or failure.
-
-The struct that holds a backtracking state is actually a big union, with
-one variant for each major type of op. The variable st points to the
-top-most backtrack struct. To make the code clearer, within each
-block of code we #define ST to alias the relevant union.
-
-Here's a concrete example of a (vastly oversimplified) IFMATCH
-implementation:
-
-    switch (state) {
-    ....
-
-#define ST st->u.ifmatch
-
-    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
-       ST.foo = ...; // some state we wish to save
-       ...
-       // push a yes backtrack state with a resume value of
-       // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
-       // first node of A:
-       PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
-       // NOTREACHED
-
-    case IFMATCH_A: // we have successfully executed A; now continue with B
-       next = B;
-       bar = ST.foo; // do something with the preserved value
-       break;
-
-    case IFMATCH_A_fail: // A failed, so the assertion failed
-       ...;   // do some housekeeping, then ...
-       sayNO; // propagate the failure
-
-#undef ST
-
-    ...
-    }
-
-For any old-timers reading this who are familiar with the old recursive
-approach, the code above is equivalent to:
-
-    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
-    {
-       int foo = ...
-       ...
-       if (regmatch(A)) {
-           next = B;
-           bar = foo;
-           break;
-       }
-       ...;   // do some housekeeping, then ...
-       sayNO; // propagate the failure
-    }
-
-The topmost backtrack state, pointed to by st, is usually free. If you
-want to claim it, populate any ST.foo fields in it with values you wish to
-save, then do one of
-
-       PUSH_STATE_GOTO(resume_state, node, newinput);
-       PUSH_YES_STATE_GOTO(resume_state, node, newinput);
-
-which sets that backtrack state's resume value to 'resume_state', pushes a
-new free entry to the top of the backtrack stack, then goes to 'node'.
-On backtracking, the free slot is popped, and the saved state becomes the
-new free state. An ST.foo field in this new top state can be temporarily
-accessed to retrieve values, but once the main loop is re-entered, it
-becomes available for reuse.
-
-Note that the depth of the backtrack stack constantly increases during the
-left-to-right execution of the pattern, rather than going up and down with
-the pattern nesting. For example the stack is at its maximum at Z at the
-end of the pattern, rather than at X in the following:
-
-    /(((X)+)+)+....(Y)+....Z/
-
-The only exceptions to this are lookahead/behind assertions and the cut,
-(?>A), which pop all the backtrack states associated with A before
-continuing.
- 
-Backtrack state structs are allocated in slabs of about 4K in size.
-PL_regmatch_state and st always point to the currently active state,
-and PL_regmatch_slab points to the slab currently containing
-PL_regmatch_state.  The first time regmatch() is called, the first slab is
-allocated, and is never freed until interpreter destruction. When the slab
-is full, a new one is allocated and chained to the end. At exit from
-regmatch(), slabs allocated since entry are freed.
-
-*/
- 
-
-#define DEBUG_STATE_pp(pp)                                  \
-    DEBUG_STATE_r({                                         \
-        DUMP_EXEC_POS(locinput, scan, utf8_target,depth);   \
-        Perl_re_printf( aTHX_                                           \
-            "%*s" pp " %s%s%s%s%s\n",                       \
-            INDENT_CHARS(depth), "",                        \
-            PL_reg_name[st->resume_state],                  \
-            ((st==yes_state||st==mark_state) ? "[" : ""),   \
-            ((st==yes_state) ? "Y" : ""),                   \
-            ((st==mark_state) ? "M" : ""),                  \
-            ((st==yes_state||st==mark_state) ? "]" : "")    \
-        );                                                  \
-    });
-
-
-#define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
-
  #ifdef DEBUGGING
  
  STATIC void
@@ -4389,14 +4091,19 @@ S_reg_check_named_buff_matched(const regexp *rex, const regnode *scan)
      return 0;
  }
  
+#define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
+#define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
+#define CHRTEST_NOT_A_CP_1 -999
+#define CHRTEST_NOT_A_CP_2 -998
  
  static bool
  S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
          U8* c1_utf8, int *c2p, U8* c2_utf8, regmatch_info *reginfo)
  {
-    /* This function determines if there are one or two characters that match
-     * the first character of the passed-in EXACTish node <text_node>, and if
-     * so, returns them in the passed-in pointers.
+    /* This function determines if there are zero, one, two, or more characters
+     * that match the first character of the passed-in EXACTish node
+     * <text_node>, and if there are one or two, it returns them in the
+     * passed-in pointers.
       *
       * If it determines that no possible character in the target string can
       * match, it returns FALSE; otherwise TRUE.  (The FALSE situation occurs if
@@ -4460,13 +4167,17 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
      U8 *pat = (U8*)STRING(text_node);
      U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
  
-    if (OP(text_node) == EXACT || OP(text_node) == EXACTL) {
+    if (   OP(text_node) == EXACT
+        || OP(text_node) == EXACT_ONLY8
+        || OP(text_node) == EXACTL)
+    {
  
          /* In an exact node, only one thing can be matched, that first
           * character.  If both the pat and the target are UTF-8, we can just
           * copy the input to the output, avoiding finding the code point of
           * that character */
          if (!is_utf8_pat) {
+            assert(OP(text_node) != EXACT_ONLY8);
              c2 = c1 = *pat;
          }
          else if (utf8_target) {
@@ -4474,6 +4185,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
              Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
              utf8_has_been_setup = TRUE;
          }
+        else if (OP(text_node) == EXACT_ONLY8) {
+            return FALSE;   /* Can only match UTF-8 target */
+        }
          else {
              c2 = c1 = valid_utf8_to_uvchr(pat, NULL);
          }
@@ -4508,7 +4222,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                  int i;
  
                  for (i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < pat_end; i++) {
-                    if (isASCII(*s)) {
+                    if (isASCII(*s) && LIKELY(! PL_in_utf8_turkic_locale)) {
                          *(d++) = (U8) toFOLD_LC(*s);
                          s++;
                      }
@@ -4529,7 +4243,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
              }
          }
  
-        if ((is_utf8_pat && is_MULTI_CHAR_FOLD_utf8_safe(pat, pat_end))
+        if (    ( is_utf8_pat && is_MULTI_CHAR_FOLD_utf8_safe(pat, pat_end))
               || (!is_utf8_pat && is_MULTI_CHAR_FOLD_latin1_safe(pat, pat_end)))
          {
              /* Multi-character folds require more context to sort out.  Also
@@ -4539,81 +4253,71 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
          }
          else { /* an EXACTFish node which doesn't begin with a multi-char fold */
              c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
-            if (c1 > 255) {
-                /* Load the folds hash, if not already done */
-                SV** listp;
-                if (! PL_utf8_foldclosures) {
-                    _load_PL_utf8_foldclosures();
-                }
  
-                /* The fold closures data structure is a hash with the keys
-                 * being the UTF-8 of every character that is folded to, like
-                 * 'k', and the values each an array of all code points that
-                 * fold to its key.  e.g. [ 'k', 'K', KELVIN_SIGN ].
-                 * Multi-character folds are not included */
-                if ((! (listp = hv_fetch(PL_utf8_foldclosures,
-                                        (char *) pat,
-                                        UTF8SKIP(pat),
-                                        FALSE))))
-                {
-                    /* Not found in the hash, therefore there are no folds
-                    * containing it, so there is only a single character that
-                    * could match */
-                    c2 = c1;
+            if (   UNLIKELY(PL_in_utf8_turkic_locale)
+                && OP(text_node) == EXACTFL
+                && UNLIKELY(   c1 == 'i' || c1 == 'I'
+                            || c1 == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
+                            || c1 == LATIN_SMALL_LETTER_DOTLESS_I))
+            {   /* Hard-coded Turkish locale rules for these 4 characters
+                   override normal rules */
+                if (c1 == 'i') {
+                    c2 = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
                  }
-                else {  /* Does participate in folds */
-                    AV* list = (AV*) *listp;
-                    if (av_tindex_skip_len_mg(list) != 1) {
-
-                        /* If there aren't exactly two folds to this, it is
-                         * outside the scope of this function */
-                        use_chrtest_void = TRUE;
-                    }
-                    else {  /* There are two.  Get them */
-                        SV** c_p = av_fetch(list, 0, FALSE);
-                        if (c_p == NULL) {
-                            Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
-                        }
-                        c1 = SvUV(*c_p);
-
-                        c_p = av_fetch(list, 1, FALSE);
-                        if (c_p == NULL) {
-                            Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
-                        }
-                        c2 = SvUV(*c_p);
-
-                        /* Folds that cross the 255/256 boundary are forbidden
-                         * if EXACTFL (and isnt a UTF8 locale), or EXACTFA and
-                         * one is ASCIII.  Since the pattern character is above
-                         * 255, and its only other match is below 256, the only
-                         * legal match will be to itself.  We have thrown away
-                         * the original, so have to compute which is the one
-                         * above 255. */
-                        if ((c1 < 256) != (c2 < 256)) {
-                            if ((OP(text_node) == EXACTFL
-                                 && ! IN_UTF8_CTYPE_LOCALE)
-                                || ((OP(text_node) == EXACTFA
-                                    || OP(text_node) == EXACTFA_NO_TRIE)
-                                    && (isASCII(c1) || isASCII(c2))))
-                            {
-                                if (c1 < 256) {
-                                    c1 = c2;
-                                }
-                                else {
-                                    c2 = c1;
-                                }
-                            }
-                        }
+                else if (c1 == 'I') {
+                    c2 = LATIN_SMALL_LETTER_DOTLESS_I;
+                }
+                else if (c1 == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
+                    c2 = 'i';
+                }
+                else if (c1 == LATIN_SMALL_LETTER_DOTLESS_I) {
+                    c2 = 'I';
+                }
+            }
+            else if (c1 > 255) {
+                const unsigned int * remaining_folds;
+                unsigned int first_fold;
+
+                /* Look up what code points (besides c1) fold to c1;  e.g.,
+                 * [ 'K', KELVIN_SIGN ] both fold to 'k'. */
+                Size_t folds_count = _inverse_folds(c1, &first_fold,
+                                                       &remaining_folds);
+                if (folds_count == 0) {
+                    c2 = c1;    /* there is only a single character that could
+                                   match */
+                }
+                else if (folds_count != 1) {
+                    /* If there aren't exactly two folds to this (itself and
+                     * another), it is outside the scope of this function */
+                    use_chrtest_void = TRUE;
+                }
+                else {  /* There are two.  We already have one, get the other */
+                    c2 = first_fold;
+
+                    /* Folds that cross the 255/256 boundary are forbidden if
+                     * EXACTFL (and isnt a UTF8 locale), or EXACTFAA and one is
+                     * ASCIII.  The only other match to c1 is c2, and since c1
+                     * is above 255, c2 better be as well under these
+                     * circumstances.  If it isn't, it means the only legal
+                     * match of c1 is itself. */
+                    if (    c2 < 256
+                        && (   (   OP(text_node) == EXACTFL
+                                && ! IN_UTF8_CTYPE_LOCALE)
+                            || ((     OP(text_node) == EXACTFAA
+                                   || OP(text_node) == EXACTFAA_NO_TRIE)
+                                && (isASCII(c1) || isASCII(c2)))))
+                    {
+                        c2 = c1;
                      }
                  }
              }
              else /* Here, c1 is <= 255 */
-                if (utf8_target
+                if (   utf8_target
                      && HAS_NONLATIN1_FOLD_CLOSURE(c1)
                      && ( ! (OP(text_node) == EXACTFL && ! IN_UTF8_CTYPE_LOCALE))
-                    && ((OP(text_node) != EXACTFA
-                        && OP(text_node) != EXACTFA_NO_TRIE)
-                        || ! isASCII(c1)))
+                    && (   (   OP(text_node) != EXACTFAA
+                            && OP(text_node) != EXACTFAA_NO_TRIE)
+                        ||   ! isASCII(c1)))
              {
                  /* Here, there could be something above Latin1 in the target
                   * which folds to this character in the pattern.  All such
@@ -4644,16 +4348,19 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                          }
                          /* FALLTHROUGH */
                          /* /u rules for all these.  This happens to work for
-                        * EXACTFA as nothing in Latin1 folds to ASCII */
-                    case EXACTFA_NO_TRIE:   /* This node only generated for
-                                            non-utf8 patterns */
+                        * EXACTFAA as nothing in Latin1 folds to ASCII */
+                    case EXACTFAA_NO_TRIE:   /* This node only generated for
+                                                non-utf8 patterns */
                          assert(! is_utf8_pat);
                          /* FALLTHROUGH */
-                    case EXACTFA:
-                    case EXACTFU_SS:
+                    case EXACTFAA:
+                    case EXACTFUP:
                      case EXACTFU:
                          c2 = PL_fold_latin1[c1];
                          break;
+                    case EXACTFU_ONLY8:
+                        return FALSE;
+                        NOT_REACHED; /* NOTREACHED */
  
                      default:
                          Perl_croak(aTHX_ "panic: Unexpected op %u", OP(text_node));
@@ -4750,6 +4457,24 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
                  return prev != GCB_E_Base && prev != GCB_E_Base_GAZ;
              }
  
+        case GCB_Maybe_Emoji_NonBreak:
+
+            {
+
+            /* Do not break within emoji modifier sequences or emoji zwj sequences.
+              GB11 \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
+              */
+                U8 * temp_pos = (U8 *) curpos;
+                GCB_enum prev;
+
+                do {
+                    prev = backup_one_GCB(strbeg, &temp_pos, utf8_target);
+                }
+                while (prev == GCB_Extend);
+
+                return prev != GCB_XPG_XX;
+            }
+
          default:
              break;
      }
@@ -4765,6 +4490,7 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
  STATIC GCB_enum
  S_backup_one_GCB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
+    dVAR;
      GCB_enum gcb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_GCB;
@@ -5042,6 +4768,8 @@ S_isLB(pTHX_ LB_enum before,
  STATIC LB_enum
  S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
  {
+    dVAR;
+
      LB_enum lb;
  
      PERL_ARGS_ASSERT_ADVANCE_ONE_LB;
@@ -5071,6 +4799,7 @@ S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_ta
  STATIC LB_enum
  S_backup_one_LB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
+    dVAR;
      LB_enum lb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_LB;
@@ -5307,6 +5036,7 @@ S_isSB(pTHX_ SB_enum before,
  STATIC SB_enum
  S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
  {
+    dVAR;
      SB_enum sb;
  
      PERL_ARGS_ASSERT_ADVANCE_ONE_SB;
@@ -5340,6 +5070,7 @@ S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_ta
  STATIC SB_enum
  S_backup_one_SB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
  {
+    dVAR;
      SB_enum sb;
  
      PERL_ARGS_ASSERT_BACKUP_ONE_SB;
@@ -5576,6 +5307,7 @@ S_advance_one_WB(pTHX_ U8 ** curpos,
                         const bool utf8_target,
                         const bool skip_Extend_Format)
  {
+    dVAR;
      WB_enum wb;
  
      PERL_ARGS_ASSERT_ADVANCE_ONE_WB;
@@ -5610,103 +5342,289 @@ S_advance_one_WB(pTHX_ U8 ** curpos,
      return wb;
  }
  
-STATIC WB_enum
-S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
-{
-    WB_enum wb;
+STATIC WB_enum
+S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
+{
+    dVAR;
+    WB_enum wb;
+
+    PERL_ARGS_ASSERT_BACKUP_ONE_WB;
+
+    /* If we know what the previous character's break value is, don't have
+        * to look it up */
+    if (*previous != WB_UNKNOWN) {
+        wb = *previous;
+
+        /* But we need to move backwards by one */
+        if (utf8_target) {
+            *curpos = reghopmaybe3(*curpos, -1, strbeg);
+            if (! *curpos) {
+                *previous = WB_EDGE;
+                *curpos = (U8 *) strbeg;
+            }
+            else {
+                *previous = WB_UNKNOWN;
+            }
+        }
+        else {
+            (*curpos)--;
+            *previous = (*curpos <= strbeg) ? WB_EDGE : WB_UNKNOWN;
+        }
+
+        /* And we always back up over these three types */
+        if (wb != WB_Extend && wb != WB_Format && wb != WB_ZWJ) {
+            return wb;
+        }
+    }
+
+    if (*curpos < strbeg) {
+        return WB_EDGE;
+    }
+
+    if (utf8_target) {
+        U8 * prev_char_pos = reghopmaybe3(*curpos, -1, strbeg);
+        if (! prev_char_pos) {
+            return WB_EDGE;
+        }
+
+        /* Back up over Extend and Format.  curpos is always just to the right
+         * of the characater whose value we are getting */
+        do {
+            U8 * prev_prev_char_pos;
+            if ((prev_prev_char_pos = reghopmaybe3((U8 *) prev_char_pos,
+                                                   -1,
+                                                   strbeg)))
+            {
+                wb = getWB_VAL_UTF8(prev_prev_char_pos, prev_char_pos);
+                *curpos = prev_char_pos;
+                prev_char_pos = prev_prev_char_pos;
+            }
+            else {
+                *curpos = (U8 *) strbeg;
+                return WB_EDGE;
+            }
+        } while (wb == WB_Extend || wb == WB_Format || wb == WB_ZWJ);
+    }
+    else {
+        do {
+            if (*curpos - 2 < strbeg) {
+                *curpos = (U8 *) strbeg;
+                return WB_EDGE;
+            }
+            (*curpos)--;
+            wb = getWB_VAL_CP(*(*curpos - 1));
+        } while (wb == WB_Extend || wb == WB_Format);
+    }
+
+    return wb;
+}
+
+/* Macros for regmatch(), using its internal variables */
+#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
+#define NEXTCHR_IS_EOS (nextchr < 0)
+
+#define SET_nextchr \
+    nextchr = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
+
+#define SET_locinput(p) \
+    locinput = (p);  \
+    SET_nextchr
+
+#define sayYES goto yes
+#define sayNO goto no
+#define sayNO_SILENT goto no_silent
+
+/* we dont use STMT_START/END here because it leads to
+   "unreachable code" warnings, which are bogus, but distracting. */
+#define CACHEsayNO \
+    if (ST.cache_mask) \
+       reginfo->info_aux->poscache[ST.cache_offset] |= ST.cache_mask; \
+    sayNO
+
+#define EVAL_CLOSE_PAREN_IS(st,expr)                        \
+(                                                           \
+    (   ( st )                                         ) && \
+    (   ( st )->u.eval.close_paren                     ) && \
+    ( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) )    \
+)
+
+#define EVAL_CLOSE_PAREN_IS_TRUE(st,expr)                   \
+(                                                           \
+    (   ( st )                                         ) && \
+    (   ( st )->u.eval.close_paren                     ) && \
+    (   ( expr )                                       ) && \
+    ( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) )    \
+)
+
+
+#define EVAL_CLOSE_PAREN_SET(st,expr) \
+    (st)->u.eval.close_paren = ( (expr) + 1 )
+
+#define EVAL_CLOSE_PAREN_CLEAR(st) \
+    (st)->u.eval.close_paren = 0
+
+/* push a new state then goto it */
+
+#define PUSH_STATE_GOTO(state, node, input) \
+    pushinput = input; \
+    scan = node; \
+    st->resume_state = state; \
+    goto push_state;
+
+/* push a new state with success backtracking, then goto it */
+
+#define PUSH_YES_STATE_GOTO(state, node, input) \
+    pushinput = input; \
+    scan = node; \
+    st->resume_state = state; \
+    goto push_yes_state;
+
+#define DEBUG_STATE_pp(pp)                                  \
+    DEBUG_STATE_r({                                         \
+        DUMP_EXEC_POS(locinput, scan, utf8_target,depth);   \
+        Perl_re_printf( aTHX_                               \
+            "%*s" pp " %s%s%s%s%s\n",                       \
+            INDENT_CHARS(depth), "",                        \
+            PL_reg_name[st->resume_state],                  \
+            ((st==yes_state||st==mark_state) ? "[" : ""),   \
+            ((st==yes_state) ? "Y" : ""),                   \
+            ((st==mark_state) ? "M" : ""),                  \
+            ((st==yes_state||st==mark_state) ? "]" : "")    \
+        );                                                  \
+    });
+
+/*
+
+regmatch() - main matching routine
+
+This is basically one big switch statement in a loop. We execute an op,
+set 'next' to point the next op, and continue. If we come to a point which
+we may need to backtrack to on failure such as (A|B|C), we push a
+backtrack state onto the backtrack stack. On failure, we pop the top
+state, and re-enter the loop at the state indicated. If there are no more
+states to pop, we return failure.
+
+Sometimes we also need to backtrack on success; for example /A+/, where
+after successfully matching one A, we need to go back and try to
+match another one; similarly for lookahead assertions: if the assertion
+completes successfully, we backtrack to the state just before the assertion
+and then carry on.  In these cases, the pushed state is marked as
+'backtrack on success too'. This marking is in fact done by a chain of
+pointers, each pointing to the previous 'yes' state. On success, we pop to
+the nearest yes state, discarding any intermediate failure-only states.
+Sometimes a yes state is pushed just to force some cleanup code to be
+called at the end of a successful match or submatch; e.g. (??{$re}) uses
+it to free the inner regex.
+
+Note that failure backtracking rewinds the cursor position, while
+success backtracking leaves it alone.
+
+A pattern is complete when the END op is executed, while a subpattern
+such as (?=foo) is complete when the SUCCESS op is executed. Both of these
+ops trigger the "pop to last yes state if any, otherwise return true"
+behaviour.
+
+A common convention in this function is to use A and B to refer to the two
+subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
+the subpattern to be matched possibly multiple times, while B is the entire
+rest of the pattern. Variable and state names reflect this convention.
+
+The states in the main switch are the union of ops and failure/success of
+substates associated with with that op.  For example, IFMATCH is the op
+that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
+'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
+successfully matched A and IFMATCH_A_fail is a state saying that we have
+just failed to match A. Resume states always come in pairs. The backtrack
+state we push is marked as 'IFMATCH_A', but when that is popped, we resume
+at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
+on success or failure.
+
+The struct that holds a backtracking state is actually a big union, with
+one variant for each major type of op. The variable st points to the
+top-most backtrack struct. To make the code clearer, within each
+block of code we #define ST to alias the relevant union.
+
+Here's a concrete example of a (vastly oversimplified) IFMATCH
+implementation:
+
+    switch (state) {
+    ....
+
+#define ST st->u.ifmatch
  
-    PERL_ARGS_ASSERT_BACKUP_ONE_WB;
+    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
+       ST.foo = ...; // some state we wish to save
+       ...
+       // push a yes backtrack state with a resume value of
+       // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
+       // first node of A:
+       PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
+       // NOTREACHED
  
-    /* If we know what the previous character's break value is, don't have
-        * to look it up */
-    if (*previous != WB_UNKNOWN) {
-        wb = *previous;
+    case IFMATCH_A: // we have successfully executed A; now continue with B
+       next = B;
+       bar = ST.foo; // do something with the preserved value
+       break;
  
-        /* But we need to move backwards by one */
-        if (utf8_target) {
-            *curpos = reghopmaybe3(*curpos, -1, strbeg);
-            if (! *curpos) {
-                *previous = WB_EDGE;
-                *curpos = (U8 *) strbeg;
-            }
-            else {
-                *previous = WB_UNKNOWN;
-            }
-        }
-        else {
-            (*curpos)--;
-            *previous = (*curpos <= strbeg) ? WB_EDGE : WB_UNKNOWN;
-        }
+    case IFMATCH_A_fail: // A failed, so the assertion failed
+       ...;   // do some housekeeping, then ...
+       sayNO; // propagate the failure
  
-        /* And we always back up over these three types */
-        if (wb != WB_Extend && wb != WB_Format && wb != WB_ZWJ) {
-            return wb;
-        }
-    }
+#undef ST
  
-    if (*curpos < strbeg) {
-        return WB_EDGE;
+    ...
      }
  
-    if (utf8_target) {
-        U8 * prev_char_pos = reghopmaybe3(*curpos, -1, strbeg);
-        if (! prev_char_pos) {
-            return WB_EDGE;
-        }
+For any old-timers reading this who are familiar with the old recursive
+approach, the code above is equivalent to:
  
-        /* Back up over Extend and Format.  curpos is always just to the right
-         * of the characater whose value we are getting */
-        do {
-            U8 * prev_prev_char_pos;
-            if ((prev_prev_char_pos = reghopmaybe3((U8 *) prev_char_pos,
-                                                   -1,
-                                                   strbeg)))
-            {
-                wb = getWB_VAL_UTF8(prev_prev_char_pos, prev_char_pos);
-                *curpos = prev_char_pos;
-                prev_char_pos = prev_prev_char_pos;
-            }
-            else {
-                *curpos = (U8 *) strbeg;
-                return WB_EDGE;
-            }
-        } while (wb == WB_Extend || wb == WB_Format || wb == WB_ZWJ);
-    }
-    else {
-        do {
-            if (*curpos - 2 < strbeg) {
-                *curpos = (U8 *) strbeg;
-                return WB_EDGE;
-            }
-            (*curpos)--;
-            wb = getWB_VAL_CP(*(*curpos - 1));
-        } while (wb == WB_Extend || wb == WB_Format);
+    case IFMATCH: // we are executing the IFMATCH op, (?=A)B
+    {
+       int foo = ...
+       ...
+       if (regmatch(A)) {
+           next = B;
+           bar = foo;
+           break;
+       }
+       ...;   // do some housekeeping, then ...
+       sayNO; // propagate the failure
      }
  
-    return wb;
-}
+The topmost backtrack state, pointed to by st, is usually free. If you
+want to claim it, populate any ST.foo fields in it with values you wish to
+save, then do one of
  
-#define EVAL_CLOSE_PAREN_IS(st,expr)                        \
-(                                                           \
-    (   ( st )                                         ) && \
-    (   ( st )->u.eval.close_paren                     ) && \
-    ( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) )    \
-)
+       PUSH_STATE_GOTO(resume_state, node, newinput);
+       PUSH_YES_STATE_GOTO(resume_state, node, newinput);
  
-#define EVAL_CLOSE_PAREN_IS_TRUE(st,expr)                   \
-(                                                           \
-    (   ( st )                                         ) && \
-    (   ( st )->u.eval.close_paren                     ) && \
-    (   ( expr )                                       ) && \
-    ( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) )    \
-)
+which sets that backtrack state's resume value to 'resume_state', pushes a
+new free entry to the top of the backtrack stack, then goes to 'node'.
+On backtracking, the free slot is popped, and the saved state becomes the
+new free state. An ST.foo field in this new top state can be temporarily
+accessed to retrieve values, but once the main loop is re-entered, it
+becomes available for reuse.
  
+Note that the depth of the backtrack stack constantly increases during the
+left-to-right execution of the pattern, rather than going up and down with
+the pattern nesting. For example the stack is at its maximum at Z at the
+end of the pattern, rather than at X in the following:
  
-#define EVAL_CLOSE_PAREN_SET(st,expr) \
-    (st)->u.eval.close_paren = ( (expr) + 1 )
+    /(((X)+)+)+....(Y)+....Z/
  
-#define EVAL_CLOSE_PAREN_CLEAR(st) \
-    (st)->u.eval.close_paren = 0
+The only exceptions to this are lookahead/behind assertions and the cut,
+(?>A), which pop all the backtrack states associated with A before
+continuing.
+
+Backtrack state structs are allocated in slabs of about 4K in size.
+PL_regmatch_state and st always point to the currently active state,
+and PL_regmatch_slab points to the slab currently containing
+PL_regmatch_state.  The first time regmatch() is called, the first slab is
+allocated, and is never freed until interpreter destruction. When the slab
+is full, a new one is allocated and chained to the end. At exit from
+regmatch(), slabs allocated since entry are freed.
+
+*/
  
  /* returns -1 on failure, $+[0] on success */
  STATIC SSize_t
@@ -5727,6 +5645,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
      SSize_t ln = 0; /* len or last;  init to avoid compiler warning */
      SSize_t endref = 0; /* offset of end of backref when ln is start */
      char *locinput = startpos;
+    char *loceol = reginfo->strend;
      char *pushinput; /* where to continue after a PUSH */
      I32 nextchr;   /* is always set to UCHARAT(locinput), or -1 at EOS */
  
@@ -5891,13 +5810,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
  
         case SANY: /*  /./s  */
-           if (NEXTCHR_IS_EOS)
+           if (NEXTCHR_IS_EOS || locinput >= loceol)
                 sayNO;
              goto increment_locinput;
  
         case REG_ANY: /*  /./  */
-           if ((NEXTCHR_IS_EOS) || nextchr == '\n')
+           if (   NEXTCHR_IS_EOS
+                || locinput >= loceol
+                || nextchr == '\n')
+            {
                 sayNO;
+            }
              goto increment_locinput;
  
  
@@ -5907,9 +5830,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* In this case the charclass data is available inline so
                 we can fail fast without a lot of extra overhead. 
               */
-            if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
+            if ( !   NEXTCHR_IS_EOS
+                &&   locinput < loceol
+                && ! ANYOF_BITMAP_TEST(scan, nextchr))
+            {
                  DEBUG_EXECUTE_r(
-                    Perl_re_exec_indentf( aTHX_  "%sfailed to match trie start class...%s\n",
+                    Perl_re_exec_indentf( aTHX_  "%sTRIE: failed to match trie start class...%s\n",
                                depth, PL_colors[4], PL_colors[5])
                  );
                  sayNO_SILENT;
@@ -5974,7 +5900,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                  if (scan->flags == EXACTL || scan->flags == EXACTFLU8) {
                      _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
                      if (utf8_target
-                        && nextchr >= 0 /* guard against negative EOS value in nextchr */
+                        && ! NEXTCHR_IS_EOS
                          && UTF8_IS_ABOVE_LATIN1(nextchr)
                          && scan->flags == EXACTL)
                      {
@@ -5986,18 +5912,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      }
                  }
                  if (   trie->bitmap
-                    && (NEXTCHR_IS_EOS || !TRIE_BITMAP_TEST(trie, nextchr)))
+                    && (     NEXTCHR_IS_EOS
+                        ||   locinput >= loceol
+                        || ! TRIE_BITMAP_TEST(trie, nextchr)))
                  {
                     if (trie->states[ state ].wordnum) {
                          DEBUG_EXECUTE_r(
-                            Perl_re_exec_indentf( aTHX_  "%smatched empty string...%s\n",
+                            Perl_re_exec_indentf( aTHX_  "%sTRIE: matched empty string...%s\n",
                                            depth, PL_colors[4], PL_colors[5])
                          );
                         if (!trie->jump)
                             break;
                     } else {
                         DEBUG_EXECUTE_r(
-                            Perl_re_exec_indentf( aTHX_  "%sfailed to match trie start class...%s\n",
+                            Perl_re_exec_indentf( aTHX_  "%sTRIE: failed to match trie start class...%s\n",
                                            depth, PL_colors[4], PL_colors[5])
                          );
                         sayNO_SILENT;
@@ -6024,7 +5952,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                    shortest accept state and the wordnum of the longest
                    accept state */
  
-               while ( state && uc <= (U8*)(reginfo->strend) ) {
+               while ( state && uc <= (U8*)(loceol) ) {
                      U32 base = trie->states[ state ].trans.base;
                      UV uvc = 0;
                      U16 charid = 0;
@@ -6053,17 +5981,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                                  DUMP_EXEC_POS( (char *)uc, scan, utf8_target, depth );
                                  /* HERE */
                                  PerlIO_printf( Perl_debug_log,
-                                    "%*s%sState: %4" UVxf " Accepted: %c ",
+                                    "%*s%sTRIE: State: %4" UVxf " Accepted: %c ",
                                      INDENT_CHARS(depth), "", PL_colors[4],
                                     (UV)state, (accepted ? 'Y' : 'N'));
                     });
  
                     /* read a char and goto next state */
-                   if ( base && (foldlen || uc < (U8*)(reginfo->strend))) {
+                   if ( base && (foldlen || uc < (U8*)(loceol))) {
                         I32 offset;
                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
-                                            uscan, len, uvc, charid, foldlen,
-                                            foldbuf, uniflags);
+                                             (U8 *) loceol, uscan,
+                                             len, uvc, charid, foldlen,
+                                             foldbuf, uniflags);
                         charcount++;
                         if (foldlen>0)
                             ST.longfold = TRUE;
@@ -6087,7 +6016,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                     }
                     DEBUG_TRIE_EXECUTE_r(
                          Perl_re_printf( aTHX_
-                           "Charid:%3x CP:%4" UVxf " After State: %4" UVxf "%s\n",
+                           "TRIE: Charid:%3x CP:%4" UVxf " After State: %4" UVxf "%s\n",
                             charid, uvc, (UV)state, PL_colors[5] );
                     );
                 }
@@ -6106,7 +6035,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 }
  
                 DEBUG_EXECUTE_r(
-                    Perl_re_exec_indentf( aTHX_  "%sgot %" IVdf " possible matches%s\n",
+                    Perl_re_exec_indentf( aTHX_  "%sTRIE: got %" IVdf " possible matches%s\n",
                          depth,
                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
                 );
@@ -6198,8 +6127,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                         while (foldlen) {
                             if (!--chars)
                                 break;
-                           uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len,
-                                           uniflags);
+                           uvc = utf8n_to_uvchr(uscan, foldlen, &len,
+                                                 uniflags);
                             uscan += len;
                             foldlen -= len;
                         }
@@ -6239,7 +6168,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                          ? av_fetch(trie_words, ST.nextword - 1, 0) : NULL;
                 SV *sv= tmp ? sv_newmortal() : NULL;
  
-                Perl_re_exec_indentf( aTHX_  "%sonly one match left, short-circuiting: #%d <%s>%s\n",
+                Perl_re_exec_indentf( aTHX_  "%sTRIE: only one match left, short-circuiting: #%d <%s>%s\n",
                      depth, PL_colors[4],
                     ST.nextword,
                     tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
@@ -6268,9 +6197,16 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              if (utf8_target && UTF8_IS_ABOVE_LATIN1(*locinput)) {
                  _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend);
              }
+            goto do_exact;
+       case EXACT_ONLY8:
+            if (! utf8_target) {
+                sayNO;
+            }
              /* FALLTHROUGH */
         case EXACT: {            /*  /abc/        */
-           char *s = STRING(scan);
+           char *s;
+          do_exact:
+           s = STRING(scan);
             ln = STR_LEN(scan);
             if (utf8_target != is_utf8_pat) {
                 /* The target and the pattern have differing utf8ness. */
@@ -6288,7 +6224,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                       * is an invariant, but there are tests in the test suite
                       * dealing with (??{...}) which violate this) */
                     while (s < e) {
-                       if (l >= reginfo->strend
+                       if (   l >= loceol
                              || UTF8_IS_ABOVE_LATIN1(* (U8*) l))
                          {
                              sayNO;
@@ -6312,7 +6248,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 else {
                     /* The target is not utf8, the pattern is utf8. */
                     while (s < e) {
-                        if (l >= reginfo->strend
+                        if (   l >= loceol
                              || UTF8_IS_ABOVE_LATIN1(* (U8*) s))
                          {
                              sayNO;
@@ -6338,7 +6274,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              else {
                  /* The target and the pattern have the same utf8ness. */
                  /* Inline the first character, for speed. */
-                if (reginfo->strend - locinput < ln
+                if (   loceol - locinput < ln
                      || UCHARAT(s) != nextchr
                      || (ln > 1 && memNE(s, locinput, ln)))
                  {
@@ -6349,7 +6285,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             break;
             }
  
-       case EXACTFL: {          /*  /abc/il      */
+       case EXACTFL:            /*  /abc/il      */
+          {
             re_fold_t folder;
             const U8 * fold_array;
             const char * s;
@@ -6367,29 +6304,53 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              if (! utf8_target) {
                  sayNO;
              }
-            fold_utf8_flags =  FOLDEQ_LOCALE | FOLDEQ_S1_ALREADY_FOLDED
-                                             | FOLDEQ_S1_FOLDS_SANE;
-           folder = foldEQ_latin1;
+            fold_utf8_flags =  FOLDEQ_LOCALE | FOLDEQ_S2_ALREADY_FOLDED
+                                             | FOLDEQ_S2_FOLDS_SANE;
+           folder = foldEQ_latin1_s2_folded;
+           fold_array = PL_fold_latin1;
+           goto do_exactf;
+
+        case EXACTFU_ONLY8:      /* /abc/iu with something in /abc/ > 255 */
+            if (! utf8_target) {
+                sayNO;
+            }
+           assert(is_utf8_pat);
+           fold_utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
+           goto do_exactf;
+
+        case EXACTFUP:          /*  /foo/iu, and something is problematic in
+                                    'foo' so can't take shortcuts. */
+            assert(! is_utf8_pat);
+            folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
+           fold_utf8_flags = 0;
             goto do_exactf;
  
-       case EXACTFU_SS:         /*  /\x{df}/iu   */
         case EXACTFU:            /*  /abc/iu      */
-           folder = foldEQ_latin1;
+            folder = foldEQ_latin1_s2_folded;
             fold_array = PL_fold_latin1;
-           fold_utf8_flags = is_utf8_pat ? FOLDEQ_S1_ALREADY_FOLDED : 0;
+           fold_utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
             goto do_exactf;
  
-        case EXACTFA_NO_TRIE:   /* This node only generated for non-utf8
+        case EXACTFAA_NO_TRIE:   /* This node only generated for non-utf8
                                     patterns */
              assert(! is_utf8_pat);
              /* FALLTHROUGH */
-       case EXACTFA:            /*  /abc/iaa     */
-           folder = foldEQ_latin1;
+       case EXACTFAA:            /*  /abc/iaa     */
+            folder = foldEQ_latin1_s2_folded;
             fold_array = PL_fold_latin1;
             fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
+            if (is_utf8_pat || ! utf8_target) {
+
+                /* The possible presence of a MICRO SIGN in the pattern forbids
+                 * us to view a non-UTF-8 pattern as folded when there is a
+                 * UTF-8 target */
+                fold_utf8_flags |= FOLDEQ_S2_ALREADY_FOLDED
+                                  |FOLDEQ_S2_FOLDS_SANE;
+            }
             goto do_exactf;
  
+
          case EXACTF:             /*  /abc/i    This node only generated for
                                                 non-utf8 patterns */
              assert(! is_utf8_pat);
@@ -6401,18 +6362,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             s = STRING(scan);
             ln = STR_LEN(scan);
  
-           if (utf8_target
+           if (   utf8_target
                  || is_utf8_pat
-                || state_num == EXACTFU_SS
+                || state_num == EXACTFUP
                  || (state_num == EXACTFL && IN_UTF8_CTYPE_LOCALE))
              {
               /* Either target or the pattern are utf8, or has the issue where
                * the fold lengths may differ. */
                 const char * const l = locinput;
-               char *e = reginfo->strend;
+               char *e = loceol;
  
-               if (! foldEQ_utf8_flags(s, 0,  ln, is_utf8_pat,
-                                       l, &e, 0,  utf8_target, fold_utf8_flags))
+               if (! foldEQ_utf8_flags(l, &e, 0,  utf8_target,
+                                        s, 0,  ln, is_utf8_pat,fold_utf8_flags))
                 {
                     sayNO;
                 }
@@ -6427,9 +6388,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             {
                 sayNO;
             }
-           if (reginfo->strend - locinput < ln)
+           if (loceol - locinput < ln)
                 sayNO;
-           if (ln > 1 && ! folder(s, locinput, ln))
+           if (ln > 1 && ! folder(locinput, s, ln))
                 sayNO;
             locinput += ln;
             break;
@@ -6712,6 +6673,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              }
             break;
  
+        case ANYOFPOSIXL:
         case ANYOFL:  /*  /[abc]/l      */
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
  
@@ -6722,41 +6684,54 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              /* FALLTHROUGH */
         case ANYOFD:  /*   /[abc]/d       */
         case ANYOF:  /*   /[abc]/       */
-            if (NEXTCHR_IS_EOS)
+            if (NEXTCHR_IS_EOS || locinput >= loceol)
                  sayNO;
-           if (utf8_target && ! UTF8_IS_INVARIANT(*locinput)) {
-               if (!reginclass(rex, scan, (U8*)locinput, (U8*)reginfo->strend,
-                                                                   utf8_target))
-                   sayNO;
-               locinput += UTF8SKIP(locinput);
-           }
-           else {
-               if (!REGINCLASS(rex, scan, (U8*)locinput, utf8_target))
+           if (  (! utf8_target || UTF8_IS_INVARIANT(*locinput))
+               && ! (ANYOF_FLAGS(scan) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP))
+            {
+                if (! ANYOF_BITMAP_TEST(scan, * (U8 *) (locinput))) {
                     sayNO;
+                }
                 locinput++;
-           }
+            }
+            else {
+               if (!reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+                {
+                   sayNO;
+                }
+                goto increment_locinput;
+            }
             break;
  
          case ANYOFM:
-            if (NEXTCHR_IS_EOS || (UCHARAT(locinput) & FLAGS(scan)) != ARG(scan)) {
+            if (   NEXTCHR_IS_EOS
+                || (UCHARAT(locinput) & FLAGS(scan)) != ARG(scan)
+                || locinput >= loceol)
+            {
                  sayNO;
              }
-            locinput++;
+            locinput++; /* ANYOFM is always single byte */
              break;
  
-        case ASCII:
-            if (NEXTCHR_IS_EOS || ! isASCII(UCHARAT(locinput))) {
+        case NANYOFM:
+            if (   NEXTCHR_IS_EOS
+                || (UCHARAT(locinput) & FLAGS(scan)) == ARG(scan)
+                || locinput >= loceol)
+            {
                  sayNO;
              }
-
-            locinput++;     /* ASCII is always single byte */
+            goto increment_locinput;
              break;
  
-        case NASCII:
-            if (NEXTCHR_IS_EOS || isASCII(UCHARAT(locinput))) {
+        case ANYOFH:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+               || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+            {
                  sayNO;
              }
-
              goto increment_locinput;
              break;
  
@@ -6769,7 +6744,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case POSIXL:    /* \w or [:punct:] etc. under /l */
              _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
-            if (NEXTCHR_IS_EOS)
+            if (NEXTCHR_IS_EOS || locinput >= loceol)
                  sayNO;
  
              /* Use isFOO_lc() for characters within Latin1.  (Note that
@@ -6814,7 +6789,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case NPOSIXA:   /* \W or [:^punct:] etc. under /a */
  
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
@@ -6833,7 +6808,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
               * UTF-8, and also from NPOSIXA even in UTF-8 when the current
               * character is a single byte */
  
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
@@ -6856,7 +6831,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
          case POSIXU:    /* \w or [:punct:] etc. under /u */
            utf8_posix:
-            if (NEXTCHR_IS_EOS) {
+            if (NEXTCHR_IS_EOS || locinput >= loceol) {
                  sayNO;
              }
  
@@ -6884,62 +6859,52 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              else {  /* Handle above Latin-1 code points */
                utf8_posix_above_latin1:
                  classnum = (_char_class_number) FLAGS(scan);
-                if (classnum < _FIRST_NON_SWASH_CC) {
-
-                    /* Here, uses a swash to find such code points.  Load if if
-                     * not done already */
-                    if (! PL_utf8_swash_ptrs[classnum]) {
-                        U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
-                        PL_utf8_swash_ptrs[classnum]
-                                = _core_swash_init("utf8",
-                                        "",
-                                        &PL_sv_undef, 1, 0,
-                                        PL_XPosix_ptrs[classnum], &flags);
-                    }
-                    if (! (to_complement
-                           ^ cBOOL(swash_fetch(PL_utf8_swash_ptrs[classnum],
-                                               (U8 *) locinput, TRUE))))
-                    {
-                        sayNO;
-                    }
-                }
-                else {  /* Here, uses macros to find above Latin-1 code points */
-                    switch (classnum) {
-                        case _CC_ENUM_SPACE:
-                            if (! (to_complement
-                                        ^ cBOOL(is_XPERLSPACE_high(locinput))))
-                            {
-                                sayNO;
-                            }
-                            break;
-                        case _CC_ENUM_BLANK:
-                            if (! (to_complement
-                                            ^ cBOOL(is_HORIZWS_high(locinput))))
-                            {
-                                sayNO;
-                            }
-                            break;
-                        case _CC_ENUM_XDIGIT:
-                            if (! (to_complement
-                                            ^ cBOOL(is_XDIGIT_high(locinput))))
-                            {
-                                sayNO;
-                            }
-                            break;
-                        case _CC_ENUM_VERTSPACE:
-                            if (! (to_complement
-                                            ^ cBOOL(is_VERTWS_high(locinput))))
-                            {
-                                sayNO;
-                            }
-                            break;
-                        default:    /* The rest, e.g. [:cntrl:], can't match
-                                       above Latin1 */
-                            if (! to_complement) {
-                                sayNO;
-                            }
-                            break;
-                    }
+                switch (classnum) {
+                    default:
+                        if (! (to_complement
+                           ^ cBOOL(_invlist_contains_cp(
+                                      PL_XPosix_ptrs[classnum],
+                                      utf8_to_uvchr_buf((U8 *) locinput,
+                                                        (U8 *) reginfo->strend,
+                                                        NULL)))))
+                        {
+                            sayNO;
+                        }
+                        break;
+                    case _CC_ENUM_SPACE:
+                        if (! (to_complement
+                                    ^ cBOOL(is_XPERLSPACE_high(locinput))))
+                        {
+                            sayNO;
+                        }
+                        break;
+                    case _CC_ENUM_BLANK:
+                        if (! (to_complement
+                                        ^ cBOOL(is_HORIZWS_high(locinput))))
+                        {
+                            sayNO;
+                        }
+                        break;
+                    case _CC_ENUM_XDIGIT:
+                        if (! (to_complement
+                                        ^ cBOOL(is_XDIGIT_high(locinput))))
+                        {
+                            sayNO;
+                        }
+                        break;
+                    case _CC_ENUM_VERTSPACE:
+                        if (! (to_complement
+                                        ^ cBOOL(is_VERTWS_high(locinput))))
+                        {
+                            sayNO;
+                        }
+                        break;
+                    case _CC_ENUM_CNTRL:    /* These can't match above Latin1 */
+                    case _CC_ENUM_ASCII:
+                        if (! to_complement) {
+                            sayNO;
+                        }
+                        break;
                  }
                  locinput += UTF8SKIP(locinput);
              }
@@ -6947,7 +6912,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  
         case CLUMP: /* Match \X: logical Unicode character.  This is defined as
                        a Unicode extended Grapheme Cluster */
-           if (NEXTCHR_IS_EOS)
+           if (NEXTCHR_IS_EOS || locinput >= loceol)
                 sayNO;
             if  (! utf8_target) {
  
@@ -6956,7 +6921,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 locinput++;         /* Match the . or CR */
                 if (nextchr == '\r' /* And if it was CR, and the next is LF,
                                        match the LF */
-                   && locinput < reginfo->strend
+                   && locinput <  loceol
                     && UCHARAT(locinput) == '\n')
                  {
                      locinput++;
@@ -6973,7 +6938,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                   * current character.  (There is always a break at the
                   * end-of-input) */
                  locinput += UTF8SKIP(locinput);
-                while (locinput < reginfo->strend) {
+                while (locinput < loceol) {
                      GCB_enum cur_gcb = getGCB_VAL_UTF8((U8*) locinput,
                                                           (U8*) reginfo->strend);
                      if (isGCB(prev_gcb, cur_gcb,
@@ -7095,11 +7060,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             if (type != REF     /* REF can do byte comparison */
                 && (utf8_target || type == REFFU || type == REFFL))
             {
-               char * limit = reginfo->strend;
+               char * limit = loceol;
  
                 /* This call case insensitively compares the entire buffer
                     * at s, with the current input starting at locinput, but
-                    * not going off the end given by reginfo->strend, and
+                    * not going off the end given by loceol, and
                      * returns in <limit> upon success, how much of the
                      * current input was matched */
                 if (! foldEQ_utf8_flags(s, NULL, endref - ln, utf8_target,
@@ -7112,17 +7077,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
  
             /* Not utf8:  Inline the first character, for speed. */
-           if (!NEXTCHR_IS_EOS &&
-                UCHARAT(s) != nextchr &&
-               (type == REF ||
-                UCHARAT(s) != fold_array[nextchr]))
+           if ( ! NEXTCHR_IS_EOS
+                && locinput < loceol
+                && UCHARAT(s) != nextchr
+                && (   type == REF
+                    || UCHARAT(s) != fold_array[nextchr]))
+            {
                 sayNO;
+            }
             ln = endref - ln;
-           if (locinput + ln > reginfo->strend)
+           if (locinput + ln > loceol)
                 sayNO;
             if (ln > 1 && (type == REF
                            ? memNE(s, locinput, ln)
-                          : ! folder(s, locinput, ln)))
+                          : ! folder(locinput, s, ln)))
                 sayNO;
             locinput += ln;
             break;
@@ -7379,8 +7347,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 PL_op = NULL;
  
                  re_sv = NULL;
-               if (logical == 0)        /*   (?{})/   */
-                   sv_setsv(save_scalar(PL_replgv), ret); /* $^R */
+               if (logical == 0) {       /*   (?{})/   */
+                    SV *replsv = save_scalar(PL_replgv);
+                    sv_setsv(replsv, ret); /* $^R */
+                    SvSETMAGIC(replsv);
+                }
                 else if (logical == 1) { /*   /(?(?{...})X|Y)/    */
                     sw = cBOOL(SvTRUE_NN(ret));
                     logical = 0;
@@ -7481,7 +7452,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 rei = RXi_GET(re);
                  DEBUG_EXECUTE_r(
                      debug_start_match(re_sv, utf8_target, locinput,
-                                    reginfo->strend, "Matching embedded");
+                                    reginfo->strend, "EVAL/GOSUB: Matching embedded");
                 );              
                 startpoint = rei->program + 1;
                  EVAL_CLOSE_PAREN_CLEAR(st); /* ST.close_paren = 0;
@@ -7555,9 +7526,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              {
                  /* preserve $^R across LEAVE's. See Bug 121070. */
                  SV *save_sv= GvSV(PL_replgv);
+                SV *replsv;
                  SvREFCNT_inc(save_sv);
                  regcpblow(ST.cp); /* LEAVE in disguise */
-                sv_setsv(GvSV(PL_replgv), save_sv);
+                /* don't move this initialization up */
+                replsv = GvSV(PL_replgv);
+                sv_setsv(replsv, save_sv);
+                SvSETMAGIC(replsv);
                  SvREFCNT_dec(save_sv);
              }
             cur_eval = ST.prev_eval;
@@ -7611,7 +7586,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             if (n > maxopenparen)
                 maxopenparen = n;
              DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
-               "rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf " tmp; maxopenparen=%" UVuf "\n",
+               "OPEN: rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf " tmp; maxopenparen=%" UVuf "\n",
                  depth,
                 PTR2UV(rex),
                 PTR2UV(rex->offs),
@@ -7626,26 +7601,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              script_run_begin = (U8 *) locinput;
              break;
  
-/* XXX really need to log other places start/end are set too */
-#define CLOSE_CAPTURE                                                      \
-    rex->offs[n].start = rex->offs[n].start_tmp;                           \
-    rex->offs[n].end = locinput - reginfo->strbeg;                         \
-    DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_                            \
-        "rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf "..%" IVdf "\n", \
-        depth,                                                             \
-        PTR2UV(rex),                                                       \
-        PTR2UV(rex->offs),                                                 \
-        (UV)n,                                                             \
-        (IV)rex->offs[n].start,                                            \
-        (IV)rex->offs[n].end                                               \
-    ))
  
         case CLOSE:  /*  )  */
             n = ARG(scan);  /* which paren pair */
-           CLOSE_CAPTURE;
-           if (n > rex->lastparen)
-               rex->lastparen = n;
-           rex->lastcloseparen = n;
+           CLOSE_CAPTURE(n, rex->offs[n].start_tmp,
+                             locinput - reginfo->strbeg);
              if ( EVAL_CLOSE_PAREN_IS( cur_eval, n ) )
                 goto fake_end;
  
@@ -7673,10 +7633,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                      if ( OP(cursor)==CLOSE ){
                          n = ARG(cursor);
                          if ( n <= lastopen ) {
-                           CLOSE_CAPTURE;
-                            if (n > rex->lastparen)
-                                rex->lastparen = n;
-                            rex->lastcloseparen = n;
+                           CLOSE_CAPTURE(n, rex->offs[n].start_tmp,
+                                             locinput - reginfo->strbeg);
                              if ( n == ARG(scan) || EVAL_CLOSE_PAREN_IS(cur_eval, n) )
                                  break;
                          }
@@ -7872,7 +7830,7 @@ NULL
             ST.cache_mask = 0;
             
  
-            DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "whilem: matched %ld out of %d..%d\n",
+            DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "WHILEM: matched %ld out of %d..%d\n",
                    depth, (long)n, min, max)
             );
  
@@ -7890,7 +7848,7 @@ NULL
             /* If degenerate A matches "", assume A done. */
  
             if (locinput == cur_curlyx->u.curlyx.lastloc) {
-                DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "whilem: empty match detected, trying continuation...\n",
+                DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "WHILEM: empty match detected, trying continuation...\n",
                     depth)
                 );
                 goto do_whilem_B_max;
@@ -7958,7 +7916,7 @@ NULL
                         Newxz(aux->poscache, size, char);
                     }
                      DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
-      "%swhilem: Detected a super-linear match, switching on caching%s...\n",
+      "%sWHILEM: Detected a super-linear match, switching on caching%s...\n",
                               PL_colors[4], PL_colors[5])
                     );
                 }
@@ -7974,7 +7932,7 @@ NULL
                     mask    = 1 << (offset % 8);
                     offset /= 8;
                     if (reginfo->info_aux->poscache[offset] & mask) {
-                        DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "whilem: (cache) already tried at this position...\n",
+                        DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "WHILEM: (cache) already tried at this position...\n",
                              depth)
                         );
                          cur_curlyx->u.curlyx.count--;
@@ -8022,11 +7980,11 @@ NULL
             CACHEsayNO;
             NOT_REACHED; /* NOTREACHED */
  
+       case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
+           /* FALLTHROUGH */
         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
             REGCP_UNWIND(ST.lastcp);
              regcppop(rex, &maxopenparen);
-           /* FALLTHROUGH */
-       case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
             cur_curlyx->u.curlyx.count--;
             CACHEsayNO;
@@ -8035,7 +7993,7 @@ NULL
         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
             REGCP_UNWIND(ST.lastcp);
              regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
-            DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_  "whilem: failed, trying continuation...\n",
+            DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_  "WHILEM: failed, trying continuation...\n",
                  depth)
             );
           do_whilem_B_max:
@@ -8076,10 +8034,13 @@ NULL
                 CACHEsayNO;
             }
  
-            DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_  "trying longer...\n", depth)
+            DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_  "WHILEM: B min fail: trying longer...\n", depth)
             );
             /* Try grabbing another A and see if it helps. */
             cur_curlyx->u.curlyx.lastloc = locinput;
+            ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
+                            maxopenparen);
+           REGCP_SET(ST.lastcp);
             PUSH_STATE_GOTO(WHILEM_A_min,
                 /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
                  locinput);
@@ -8244,15 +8205,6 @@ NULL
                     regnode *text_node = ST.B;
                     if (! HAS_TEXT(text_node))
                         FIND_NEXT_IMPT(text_node);
-                   /* this used to be 
-                       
-                       (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
-                       
-                       But the former is redundant in light of the latter.
-                       
-                       if this changes back then the macro for 
-                       IS_TEXT and friends need to change.
-                    */
                     if (PL_regkind[OP(text_node)] == EXACT) {
                          if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
                             text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
@@ -8299,14 +8251,11 @@ NULL
  
             if (ST.me->flags) {
                 /* emulate CLOSE: mark current A as captured */
-               I32 paren = ST.me->flags;
+               U32 paren = (U32)ST.me->flags;
                 if (ST.count) {
-                   rex->offs[paren].start
-                       = HOPc(locinput, -ST.alen) - reginfo->strbeg;
-                   rex->offs[paren].end = locinput - reginfo->strbeg;
-                   if ((U32)paren > rex->lastparen)
-                       rex->lastparen = paren;
-                   rex->lastcloseparen = paren;
+                    CLOSE_CAPTURE(paren,
+                       HOPc(locinput, -ST.alen) - reginfo->strbeg,
+                       locinput - reginfo->strbeg);
                 }
                 else
                     rex->offs[paren].end = -1;
@@ -8345,11 +8294,8 @@ NULL
  #define CURLY_SETPAREN(paren, success) \
      if (paren) { \
         if (success) { \
-           rex->offs[paren].start = HOPc(locinput, -1) - reginfo->strbeg; \
-           rex->offs[paren].end = locinput - reginfo->strbeg; \
-           if (paren > rex->lastparen) \
-               rex->lastparen = paren; \
-           rex->lastcloseparen = paren; \
+            CLOSE_CAPTURE(paren, HOPc(locinput, -1) - reginfo->strbeg, \
+                                locinput - reginfo->strbeg); \
         } \
         else { \
             rex->offs[paren].end = -1; \
@@ -8380,12 +8326,18 @@ NULL
                 maxopenparen = ST.paren;
             ST.min = ARG1(scan);  /* min to match */
             ST.max = ARG2(scan);  /* max to match */
+            scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
+
+            /* handle the single-char capture called as a GOSUB etc */
              if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
              {
-               ST.min=1;
-               ST.max=1;
+                char *li = locinput;
+                if (!regrepeat(rex, &li, scan, loceol, reginfo, 1))
+                   sayNO;
+                SET_locinput(li);
+                goto fake_end;
             }
-            scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
+
             goto repeat;
  
         case CURLY:             /*  /A{m,n}B/ where A is width 1 char */
@@ -8420,13 +8372,6 @@ NULL
                         ST.c1 = ST.c2 = CHRTEST_VOID;
                     }
                     else {
-                    
-                    /*  Currently we only get here when 
-                        
-                        PL_rekind[OP(text_node)] == EXACT
-                    
-                        if this changes back then the macro for IS_TEXT and 
-                        friends need to change. */
                          if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
                             text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
                             reginfo))
@@ -8443,7 +8388,7 @@ NULL
                  char *li = locinput;
                 minmod = 0;
                 if (ST.min &&
-                        regrepeat(rex, &li, ST.A, reginfo, ST.min)
+                        regrepeat(rex, &li, ST.A, loceol, reginfo, ST.min)
                              < ST.min)
                     sayNO;
                  SET_locinput(li);
@@ -8457,7 +8402,7 @@ NULL
                 /* set ST.maxpos to the furthest point along the
                  * string that could possibly match */
                 if  (ST.max == REG_INFTY) {
-                   ST.maxpos = reginfo->strend - 1;
+                   ST.maxpos = loceol - 1;
                     if (utf8_target)
                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
                             ST.maxpos--;
@@ -8465,13 +8410,13 @@ NULL
                 else if (utf8_target) {
                     int m = ST.max - ST.min;
                     for (ST.maxpos = locinput;
-                        m >0 && ST.maxpos < reginfo->strend; m--)
+                        m >0 && ST.maxpos <  loceol; m--)
                         ST.maxpos += UTF8SKIP(ST.maxpos);
                 }
                 else {
                     ST.maxpos = locinput + ST.max - ST.min;
-                   if (ST.maxpos >= reginfo->strend)
-                       ST.maxpos = reginfo->strend - 1;
+                   if (ST.maxpos >=  loceol)
+                       ST.maxpos =  loceol - 1;
                 }
                 goto curly_try_B_min_known;
  
@@ -8480,7 +8425,7 @@ NULL
                  /* avoid taking address of locinput, so it can remain
                   * a register var */
                  char *li = locinput;
-                ST.count = regrepeat(rex, &li, ST.A, reginfo, ST.max);
+                ST.count = regrepeat(rex, &li, ST.A, loceol, reginfo, ST.max);
                 if (ST.count < ST.min)
                     sayNO;
                  SET_locinput(li);
@@ -8501,24 +8446,41 @@ NULL
             }
             NOT_REACHED; /* NOTREACHED */
  
-       case CURLY_B_min_known_fail:
-           /* failed to find B in a non-greedy match where c1,c2 valid */
+       case CURLY_B_min_fail:
+           /* failed to find B in a non-greedy match.
+             * Handles both cases where c1,c2 valid or not */
  
             REGCP_UNWIND(ST.cp);
              if (ST.paren) {
                  UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
              }
-           /* Couldn't or didn't -- move forward. */
-           ST.oldloc = locinput;
-           if (utf8_target)
-               locinput += UTF8SKIP(locinput);
-           else
-               locinput++;
-           ST.count++;
-         curly_try_B_min_known:
-            /* find the next place where 'B' could work, then call B */
-           {
+
+            if (ST.c1 == CHRTEST_VOID) {
+                /* failed -- move forward one */
+                char *li = locinput;
+                if (!regrepeat(rex, &li, ST.A, loceol, reginfo, 1)) {
+                    sayNO;
+                }
+                locinput = li;
+                ST.count++;
+               if (!(   ST.count <= ST.max
+                        /* count overflow ? */
+                     || (ST.max == REG_INFTY && ST.count > 0))
+                )
+                    sayNO;
+            }
+            else {
                 int n;
+                /* Couldn't or didn't -- move forward. */
+                ST.oldloc = locinput;
+                if (utf8_target)
+                    locinput += UTF8SKIP(locinput);
+                else
+                    locinput++;
+                ST.count++;
+
+              curly_try_B_min_known:
+                /* find the next place where 'B' could work, then call B */
                 if (utf8_target) {
                     n = (ST.oldloc == locinput) ? 0 : 1;
                     if (ST.c1 == ST.c2) {
@@ -8593,53 +8555,22 @@ NULL
                       * locinput matches */
                      char *li = ST.oldloc;
                     ST.count += n;
-                    if (regrepeat(rex, &li, ST.A, reginfo, n) < n)
+                    if (regrepeat(rex, &li, ST.A, loceol, reginfo, n) < n)
                         sayNO;
                      assert(n == REG_INFTY || locinput == li);
                 }
-               CURLY_SETPAREN(ST.paren, ST.count);
-                if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
-                   goto fake_end;
-               PUSH_STATE_GOTO(CURLY_B_min_known, ST.B, locinput);
             }
-           NOT_REACHED; /* NOTREACHED */
-
-       case CURLY_B_min_fail:
-           /* failed to find B in a non-greedy match where c1,c2 invalid */
  
-           REGCP_UNWIND(ST.cp);
-            if (ST.paren) {
-                UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
-            }
-           /* failed -- move forward one */
-            {
-                char *li = locinput;
-                if (!regrepeat(rex, &li, ST.A, reginfo, 1)) {
-                    sayNO;
-                }
-                locinput = li;
-            }
-            {
-               ST.count++;
-               if (ST.count <= ST.max || (ST.max == REG_INFTY &&
-                       ST.count > 0)) /* count overflow ? */
-               {
-                 curly_try_B_min:
-                   CURLY_SETPAREN(ST.paren, ST.count);
-                    if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
-                        goto fake_end;
-                   PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
-               }
-           }
-            sayNO;
+          curly_try_B_min:
+            CURLY_SETPAREN(ST.paren, ST.count);
+            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
             NOT_REACHED; /* NOTREACHED */
  
+
            curly_try_B_max:
             /* a successful greedy match: now try to match B */
-            if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
-                goto fake_end;
             {
-               bool could_match = locinput < reginfo->strend;
+               bool could_match = locinput <  loceol;
  
                 /* If it could work, try it. */
                  if (ST.c1 != CHRTEST_VOID && could_match) {
@@ -8707,7 +8638,7 @@ NULL
                 st->u.eval.prev_eval = cur_eval;
                  cur_eval = CUR_EVAL.prev_eval;
                 DEBUG_EXECUTE_r(
-                    Perl_re_exec_indentf( aTHX_  "EVAL trying tail ... (cur_eval=%p)\n",
+                    Perl_re_exec_indentf( aTHX_  "END: EVAL trying tail ... (cur_eval=%p)\n",
                                        depth, cur_eval););
                  if ( nochange_depth )
                     nochange_depth--;
@@ -8720,7 +8651,7 @@ NULL
  
             if (locinput < reginfo->till) {
                  DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
-                                      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
+                                      "%sEND: Match possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
                                       PL_colors[4],
                                       (long)(locinput - startpos),
                                       (long)(reginfo->till - startpos),
@@ -8732,7 +8663,7 @@ NULL
  
         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
             DEBUG_EXECUTE_r(
-            Perl_re_exec_indentf( aTHX_  "%ssubpattern success...%s\n",
+            Perl_re_exec_indentf( aTHX_  "%sSUCCEED: subpattern success...%s\n",
                  depth, PL_colors[4], PL_colors[5]));
             sayYES;                     /* Success! */
  
@@ -8747,11 +8678,11 @@ NULL
             newstart = locinput;
             goto do_ifmatch;    
  
-       case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
+       case UNLESSM:   /* -ve lookaround: (?!A), or with 'flags', (?<!A) */
             ST.wanted = 0;
             goto ifmatch_trivial_fail_test;
  
-       case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
+       case IFMATCH:   /* +ve lookaround: (?=A), or with 'flags', (?<=A) */
             ST.wanted = 1;
           ifmatch_trivial_fail_test:
             if (scan->flags) {
@@ -8814,7 +8745,7 @@ NULL
             break;
  
         case COMMIT:  /*  (*COMMIT)  */
-           reginfo->cutpoint = reginfo->strend;
+           reginfo->cutpoint = loceol;
             /* FALLTHROUGH */
  
         case PRUNE:   /*  (*PRUNE)   */
@@ -8867,7 +8798,7 @@ NULL
                  sv_commit = ST.mark_name;
  
                  DEBUG_EXECUTE_r({
-                        Perl_re_exec_indentf( aTHX_  "%ssetting cutpoint to mark:%" SVf "...%s\n",
+                        Perl_re_exec_indentf( aTHX_  "%sMARKPOINT: next fail: setting cutpoint to mark:%" SVf "...%s\n",
                              depth,
                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
                 });
@@ -8925,7 +8856,7 @@ NULL
  #undef ST
  
          case LNBREAK: /* \R */
-            if ((n=is_LNBREAK_safe(locinput, reginfo->strend, utf8_target))) {
+            if ((n=is_LNBREAK_safe(locinput, loceol, utf8_target))) {
                  locinput += n;
              } else
                  sayNO;
@@ -8944,7 +8875,7 @@ NULL
                  locinput += PL_utf8skip[nextchr];
                  /* locinput is allowed to go 1 char off the end (signifying
                   * EOS), but not 2+ */
-                if (locinput > reginfo->strend)
+                if (locinput >  loceol)
                      sayNO;
              }
              else
@@ -9069,8 +9000,10 @@ NULL
           * see code related to PL_replgv elsewhere in this file.
           * Yves
           */
-       if (oreplsv != GvSV(PL_replgv))
+       if (oreplsv != GvSV(PL_replgv)) {
             sv_setsv(oreplsv, GvSV(PL_replgv));
+            SvSETMAGIC(oreplsv);
+        }
      }
      result = 1;
      goto final_exit;
@@ -9150,41 +9083,50 @@ NULL
   * What 'simple' means is a node which can be the operand of a quantifier like
   * '+', or {1,3}
   *
- * startposp - pointer a pointer to the start position.  This is updated
+ * startposp - pointer to a pointer to the start position.  This is updated
   *             to point to the byte following the highest successful
   *             match.
   * p         - the regnode to be repeatedly matched against.
- * reginfo   - struct holding match state, such as strend
+ * loceol    - pointer to the end position beyond which we aren't supposed to
+ *             look.
+ * reginfo   - struct holding match state, such as utf8_target
   * max       - maximum number of things to match.
   * depth     - (for debugging) backtracking depth.
   */
  STATIC I32
  S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
-            regmatch_info *const reginfo, I32 max _pDEPTH)
+            char * loceol, regmatch_info *const reginfo, I32 max _pDEPTH)
  {
+    dVAR;
      char *scan;     /* Pointer to current position in target string */
      I32 c;
-    char *loceol = reginfo->strend;   /* local version */
+    char *this_eol = loceol;   /* potentially adjusted version. */
      I32 hardcount = 0;  /* How many matches so far */
      bool utf8_target = reginfo->is_utf8_target;
      unsigned int to_complement = 0;  /* Invert the result? */
-    UV utf8_flags;
+    UV utf8_flags = 0;
      _char_class_number classnum;
  
      PERL_ARGS_ASSERT_REGREPEAT;
  
+    /* This routine is structured so that we switch on the input OP.  Each OP
+     * case: statement contains a loop to repeatedly apply the OP, advancing
+     * the input until it fails, or reaches the end of the input, or until it
+     * reaches the upper limit of matches. */
+
      scan = *startposp;
-    if (max == REG_INFTY)
+    if (max == REG_INFTY)   /* This is a special marker to go to the platform's
+                               max */
         max = I32_MAX;
-    else if (! utf8_target && loceol - scan > max)
-       loceol = scan + max;
+    else if (! utf8_target && this_eol - scan > max)
+       this_eol = scan + max;
  
-    /* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
+    /* Here, for the case of a non-UTF-8 target we have adjusted <this_eol> down
       * to the maximum of how far we should go in it (leaving it set to the real
       * end, if the maximum permissible would take us beyond that).  This allows
-     * us to make the loop exit condition that we haven't gone past <loceol> to
+     * us to make the loop exit condition that we haven't gone past <this_eol> to
       * also mean that we haven't exceeded the max permissible count, saving a
-     * test each time through the loop.  But it assumes that the OP matches a
+     * test each time through the loops.  But it assumes that the OP matches a
       * single byte, which is true for most of the OPs below when applied to a
       * non-UTF-8 target.  Those relatively few OPs that don't have this
       * characteristic will have to compensate.
@@ -9192,47 +9134,54 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
       * There is no adjustment for UTF-8 targets, as the number of bytes per
       * character varies.  OPs will have to test both that the count is less
       * than the max permissible (using <hardcount> to keep track), and that we
-     * are still within the bounds of the string (using <loceol>.  A few OPs
+     * are still within the bounds of the string (using <this_eol>.  A few OPs
       * match a single byte no matter what the encoding.  They can omit the max
       * test if, for the UTF-8 case, they do the adjustment that was skipped
       * above.
       *
       * Thus, the code above sets things up for the common case; and exceptional
       * cases need extra work; the common case is to make sure <scan> doesn't
-     * go past <loceol>, and for UTF-8 to also use <hardcount> to make sure the
+     * go past <this_eol>, and for UTF-8 to also use <hardcount> to make sure the
       * count doesn't exceed the maximum permissible */
  
      switch (OP(p)) {
      case REG_ANY:
         if (utf8_target) {
-           while (scan < loceol && hardcount < max && *scan != '\n') {
+           while (scan < this_eol && hardcount < max && *scan != '\n') {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         } else {
-            scan = (char *) memchr(scan, '\n', loceol - scan);
+            scan = (char *) memchr(scan, '\n', this_eol - scan);
              if (! scan) {
-                scan = loceol;
+                scan = this_eol;
              }
         }
         break;
      case SANY:
          if (utf8_target) {
-           while (scan < loceol && hardcount < max) {
+           while (scan < this_eol && hardcount < max) {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         }
         else
-           scan = loceol;
+           scan = this_eol;
         break;
      case EXACTL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
          if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) {
              _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(scan, loceol);
          }
+        goto do_exact;
+
+    case EXACT_ONLY8:
+        if (! utf8_target) {
+            break;
+        }
          /* FALLTHROUGH */
      case EXACT:
+      do_exact:
          assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
  
         c = (U8)*STRING(p);
@@ -9242,12 +9191,12 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
           * can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
           * true iff it doesn't matter if the argument is in UTF-8 or not */
          if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! reginfo->is_utf8_pat)) {
-            if (utf8_target && loceol - scan > max) {
-                /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
+            if (utf8_target && this_eol - scan > max) {
+                /* We didn't adjust <this_eol> because is UTF-8, but ok to do so,
                   * since here, to match at all, 1 char == 1 byte */
-                loceol = scan + max;
+                this_eol = scan + max;
              }
-            scan = find_span_end(scan, loceol, (U8) c);
+            scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
         }
         else if (reginfo->is_utf8_pat) {
              if (utf8_target) {
@@ -9256,7 +9205,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  /* When both target and pattern are UTF-8, we have to do
                   * string EQ */
                  while (hardcount < max
-                       && scan < loceol
+                       && scan < this_eol
                         && (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
                         && memEQ(scan, STRING(p), scan_char_len))
                  {
@@ -9269,7 +9218,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  /* Target isn't utf8; convert the character in the UTF-8
                   * pattern to non-UTF8, and do a simple find */
                  c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
-                scan = find_span_end(scan, loceol, (U8) c);
+                scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
              } /* else pattern char is above Latin1, can't possibly match the
                   non-UTF-8 target */
          }
@@ -9283,7 +9232,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             U8 low = UTF8_TWO_BYTE_LO(c);
  
             while (hardcount < max
-                   && scan + 1 < loceol
+                   && scan + 1 < this_eol
                     && UCHARAT(scan) == high
                     && UCHARAT(scan + 1) == low)
             {
@@ -9293,12 +9242,19 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         }
         break;
  
-    case EXACTFA_NO_TRIE:   /* This node only generated for non-utf8 patterns */
+    case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
          assert(! reginfo->is_utf8_pat);
          /* FALLTHROUGH */
-    case EXACTFA:
+    case EXACTFAA:
          utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
-       goto do_exactf;
+        if (reginfo->is_utf8_pat || ! utf8_target) {
+
+            /* The possible presence of a MICRO SIGN in the pattern forbids us
+             * to view a non-UTF-8 pattern as folded when there is a UTF-8
+             * target.  */
+            utf8_flags |= FOLDEQ_S2_ALREADY_FOLDED|FOLDEQ_S2_FOLDS_SANE;
+        }
+        goto do_exactf;
  
      case EXACTFL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
@@ -9307,7 +9263,6 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case EXACTF:   /* This node only generated for non-utf8 patterns */
          assert(! reginfo->is_utf8_pat);
-        utf8_flags = 0;
          goto do_exactf;
  
      case EXACTFLU8:
@@ -9318,9 +9273,19 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                                      | FOLDEQ_S2_FOLDS_SANE;
          goto do_exactf;
  
-    case EXACTFU_SS:
+    case EXACTFU_ONLY8:
+        if (! utf8_target) {
+            break;
+        }
+       assert(reginfo->is_utf8_pat);
+       utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
+        goto do_exactf;
+
      case EXACTFU:
-       utf8_flags = reginfo->is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+        utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
+        /* FALLTHROUGH */
+
+    case EXACTFUP:
  
        do_exactf: {
          int c1, c2;
@@ -9333,7 +9298,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          {
              if (c1 == CHRTEST_VOID) {
                  /* Use full Unicode fold matching */
-                char *tmpeol = reginfo->strend;
+                char *tmpeol = loceol;
                  STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
                  while (hardcount < max
                          && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
@@ -9341,13 +9306,13 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                                               reginfo->is_utf8_pat, utf8_flags))
                  {
                      scan = tmpeol;
-                    tmpeol = reginfo->strend;
+                    tmpeol = loceol;
                      hardcount++;
                  }
              }
              else if (utf8_target) {
                  if (c1 == c2) {
-                    while (scan < loceol
+                    while (scan < this_eol
                             && hardcount < max
                             && memEQ(scan, c1_utf8, UTF8SKIP(scan)))
                      {
@@ -9356,7 +9321,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      }
                  }
                  else {
-                    while (scan < loceol
+                    while (scan < this_eol
                             && hardcount < max
                             && (memEQ(scan, c1_utf8, UTF8SKIP(scan))
                                 || memEQ(scan, c2_utf8, UTF8SKIP(scan))))
@@ -9367,7 +9332,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  }
              }
              else if (c1 == c2) {
-                scan = find_span_end(scan, loceol, c1);
+                scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c1);
              }
              else {
                  /* See comments in regmatch() CURLY_B_min_known_fail.  We avoid
@@ -9379,12 +9344,12 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                      U8 c1_c2_mask = ~ c1_c2_bits_differing;
  
                      scan = (char *) find_span_end_mask((U8 *) scan,
-                                                       (U8 *) loceol,
+                                                       (U8 *) this_eol,
                                                         c1 & c1_c2_mask,
                                                         c1_c2_mask);
                  }
                  else {
-                    while (    scan < loceol
+                    while (    scan < this_eol
                             && (UCHARAT(scan) == c1 || UCHARAT(scan) == c2))
                      {
                          scan++;
@@ -9394,6 +9359,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         }
         break;
      }
+    case ANYOFPOSIXL:
      case ANYOFL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
  
@@ -9405,58 +9371,61 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      case ANYOF:
         if (utf8_target) {
             while (hardcount < max
-                   && scan < loceol
-                  && reginclass(prog, p, (U8*)scan, (U8*) loceol, utf8_target))
+                   && scan < this_eol
+                  && reginclass(prog, p, (U8*)scan, (U8*) this_eol, utf8_target))
             {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         }
-        else if (ANYOF_FLAGS(p)) {
-           while (scan < loceol
+        else if (ANYOF_FLAGS(p) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+           while (scan < this_eol
                      && reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0))
                 scan++;
          }
          else {
-           while (scan < loceol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
+           while (scan < this_eol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
                 scan++;
         }
         break;
  
      case ANYOFM:
-        if (utf8_target && loceol - scan > max) {
+        if (utf8_target && this_eol - scan > max) {
  
-            /* We didn't adjust <loceol> at the beginning of this routine
+            /* We didn't adjust <this_eol> at the beginning of this routine
               * because is UTF-8, but it is actually ok to do so, since here, to
               * match, 1 char == 1 byte. */
-            loceol = scan + max;
+            this_eol = scan + max;
          }
  
-        scan = (char *) find_span_end_mask((U8 *) scan, (U8 *) loceol, (U8) ARG(p), FLAGS(p));
+        scan = (char *) find_span_end_mask((U8 *) scan, (U8 *) this_eol, (U8) ARG(p), FLAGS(p));
          break;
  
-    case ASCII:
-        if (utf8_target && loceol - scan > max) {
-            loceol = scan + max;
-        }
-
-        scan = find_next_non_ascii(scan, loceol, utf8_target);
-       break;
-
-    case NASCII:
+    case NANYOFM:
         if (utf8_target) {
             while (     hardcount < max
-                   &&   scan < loceol
-                  && ! isASCII_utf8_safe(scan, loceol))
+                   &&   scan < this_eol
+                  &&  (*scan & FLAGS(p)) != ARG(p))
             {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         }
          else {
-            scan = find_next_ascii(scan, loceol, utf8_target);
+            scan = (char *) find_next_masked((U8 *) scan, (U8 *) this_eol, (U8) ARG(p), FLAGS(p));
         }
-       break;
+        break;
+
+    case ANYOFH:
+        if (utf8_target) while (   hardcount < max
+                                && scan < this_eol
+                                && reginclass(prog, p, (U8*)scan, (U8*) this_eol,
+                                                                  TRUE))
+        {
+            scan += UTF8SKIP(scan);
+            hardcount++;
+        }
+        break;
  
      /* The argument (FLAGS) to all the POSIX node types is the class number */
  
@@ -9467,15 +9436,16 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      case POSIXL:
          _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
         if (! utf8_target) {
-           while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
+           while (scan < this_eol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
                                                                     *scan)))
              {
                 scan++;
              }
         } else {
-           while (hardcount < max && scan < loceol
+           while (hardcount < max && scan < this_eol
                     && to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(p),
-                                                                  (U8 *) scan)))
+                                                                  (U8 *) scan,
+                                                                  (U8 *) this_eol)))
              {
                  scan += UTF8SKIP(scan);
                 hardcount++;
@@ -9490,14 +9460,14 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          /* FALLTHROUGH */
  
      case POSIXA:
-        if (utf8_target && loceol - scan > max) {
+        if (utf8_target && this_eol - scan > max) {
  
-            /* We didn't adjust <loceol> at the beginning of this routine
+            /* We didn't adjust <this_eol> at the beginning of this routine
               * because is UTF-8, but it is actually ok to do so, since here, to
               * match, 1 char == 1 byte. */
-            loceol = scan + max;
+            this_eol = scan + max;
          }
-        while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
+        while (scan < this_eol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
             scan++;
         }
         break;
@@ -9511,7 +9481,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case NPOSIXA:
          if (! utf8_target) {
-            while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
+            while (scan < this_eol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
                  scan++;
              }
          }
@@ -9519,8 +9489,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
              /* The complement of something that matches only ASCII matches all
               * non-ASCII, plus everything in ASCII that isn't in the class. */
-           while (hardcount < max && scan < loceol
-                   && (   ! isASCII_utf8_safe(scan, reginfo->strend)
+           while (hardcount < max && scan < this_eol
+                   && (   ! isASCII_utf8_safe(scan, loceol)
                         || ! _generic_isCC_A((U8) *scan, FLAGS(p))))
              {
                  scan += UTF8SKIP(scan);
@@ -9535,7 +9505,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
  
      case POSIXU:
         if (! utf8_target) {
-            while (scan < loceol && to_complement
+            while (scan < this_eol && to_complement
                                  ^ cBOOL(_generic_isCC((U8) *scan, FLAGS(p))))
              {
                  scan++;
@@ -9544,142 +9514,91 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         else {
            utf8_posix:
              classnum = (_char_class_number) FLAGS(p);
-            if (classnum < _FIRST_NON_SWASH_CC) {
-
-                /* Here, a swash is needed for above-Latin1 code points.
-                 * Process as many Latin1 code points using the built-in rules.
-                 * Go to another loop to finish processing upon encountering
-                 * the first Latin1 code point.  We could do that in this loop
-                 * as well, but the other way saves having to test if the swash
-                 * has been loaded every time through the loop: extra space to
-                 * save a test. */
-                while (hardcount < max && scan < loceol) {
-                    if (UTF8_IS_INVARIANT(*scan)) {
-                        if (! (to_complement ^ cBOOL(_generic_isCC((U8) *scan,
-                                                                   classnum))))
-                        {
-                            break;
-                        }
-                        scan++;
+            switch (classnum) {
+                default:
+                    while (   hardcount < max && scan < this_eol
+                           && to_complement ^ cBOOL(_invlist_contains_cp(
+                                              PL_XPosix_ptrs[classnum],
+                                              utf8_to_uvchr_buf((U8 *) scan,
+                                                                (U8 *) this_eol,
+                                                                NULL))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
                      }
-                    else if (UTF8_IS_DOWNGRADEABLE_START(*scan)) {
-                        if (! (to_complement
-                              ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*scan,
-                                                                     *(scan + 1)),
-                                                    classnum))))
-                        {
-                            break;
-                        }
-                        scan += 2;
+                    break;
+
+                    /* For the classes below, the knowledge of how to handle
+                     * every code point is compiled in to Perl via a macro.
+                     * This code is written for making the loops as tight as
+                     * possible.  It could be refactored to save space instead.
+                     * */
+
+                case _CC_ENUM_SPACE:
+                    while (hardcount < max
+                           && scan < this_eol
+                           && (to_complement
+                               ^ cBOOL(isSPACE_utf8_safe(scan, this_eol))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
                      }
-                    else {
-                        goto found_above_latin1;
+                    break;
+                case _CC_ENUM_BLANK:
+                    while (hardcount < max
+                           && scan < this_eol
+                           && (to_complement
+                                ^ cBOOL(isBLANK_utf8_safe(scan, this_eol))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
                      }
-
-                    hardcount++;
-                }
-            }
-            else {
-                /* For these character classes, the knowledge of how to handle
-                 * every code point is compiled in to Perl via a macro.  This
-                 * code is written for making the loops as tight as possible.
-                 * It could be refactored to save space instead */
-                switch (classnum) {
-                    case _CC_ENUM_SPACE:
-                        while (hardcount < max
-                               && scan < loceol
-                               && (to_complement
-                                   ^ cBOOL(isSPACE_utf8_safe(scan, loceol))))
-                        {
-                            scan += UTF8SKIP(scan);
-                            hardcount++;
-                        }
-                        break;
-                    case _CC_ENUM_BLANK:
-                        while (hardcount < max
-                               && scan < loceol
-                               && (to_complement
-                                    ^ cBOOL(isBLANK_utf8_safe(scan, loceol))))
-                        {
-                            scan += UTF8SKIP(scan);
-                            hardcount++;
-                        }
-                        break;
-                    case _CC_ENUM_XDIGIT:
-                        while (hardcount < max
-                               && scan < loceol
-                               && (to_complement
-                                   ^ cBOOL(isXDIGIT_utf8_safe(scan, loceol))))
-                        {
-                            scan += UTF8SKIP(scan);
-                            hardcount++;
-                        }
-                        break;
-                    case _CC_ENUM_VERTSPACE:
-                        while (hardcount < max
-                               && scan < loceol
-                               && (to_complement
-                                   ^ cBOOL(isVERTWS_utf8_safe(scan, loceol))))
-                        {
-                            scan += UTF8SKIP(scan);
-                            hardcount++;
-                        }
-                        break;
-                    case _CC_ENUM_CNTRL:
-                        while (hardcount < max
-                               && scan < loceol
-                               && (to_complement
-                                   ^ cBOOL(isCNTRL_utf8_safe(scan, loceol))))
-                        {
-                            scan += UTF8SKIP(scan);
-                            hardcount++;
-                        }
-                        break;
-                    default:
-                        Perl_croak(aTHX_ "panic: regrepeat() node %d='%s' has an unexpected character class '%d'", OP(p), PL_reg_name[OP(p)], classnum);
-                }
+                    break;
+                case _CC_ENUM_XDIGIT:
+                    while (hardcount < max
+                           && scan < this_eol
+                           && (to_complement
+                               ^ cBOOL(isXDIGIT_utf8_safe(scan, this_eol))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
+                    }
+                    break;
+                case _CC_ENUM_VERTSPACE:
+                    while (hardcount < max
+                           && scan < this_eol
+                           && (to_complement
+                               ^ cBOOL(isVERTWS_utf8_safe(scan, this_eol))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
+                    }
+                    break;
+                case _CC_ENUM_CNTRL:
+                    while (hardcount < max
+                           && scan < this_eol
+                           && (to_complement
+                               ^ cBOOL(isCNTRL_utf8_safe(scan, this_eol))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
+                    }
+                    break;
              }
         }
          break;
  
-      found_above_latin1:   /* Continuation of POSIXU and NPOSIXU */
-
-        /* Load the swash if not already present */
-        if (! PL_utf8_swash_ptrs[classnum]) {
-            U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
-            PL_utf8_swash_ptrs[classnum] = _core_swash_init(
-                                        "utf8",
-                                        "",
-                                        &PL_sv_undef, 1, 0,
-                                        PL_XPosix_ptrs[classnum], &flags);
-        }
-
-        while (hardcount < max && scan < loceol
-               && to_complement ^ cBOOL(_generic_utf8_safe(
-                                       classnum,
-                                       scan,
-                                       loceol,
-                                       swash_fetch(PL_utf8_swash_ptrs[classnum],
-                                                   (U8 *) scan,
-                                                   TRUE))))
-        {
-            scan += UTF8SKIP(scan);
-            hardcount++;
-        }
-        break;
-
      case LNBREAK:
          if (utf8_target) {
-           while (hardcount < max && scan < loceol &&
-                    (c=is_LNBREAK_utf8_safe(scan, loceol))) {
+           while (hardcount < max && scan < this_eol &&
+                    (c=is_LNBREAK_utf8_safe(scan, this_eol))) {
                 scan += c;
                 hardcount++;
             }
         } else {
              /* LNBREAK can match one or two latin chars, which is ok, but we
               * have to use hardcount in this situation, and throw away the
-             * adjustment to <loceol> done before the switch statement */
-            loceol = reginfo->strend;
+             * adjustment to <this_eol> done before the switch statement */
             while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
                 scan+=c;
                 hardcount++;
@@ -9731,27 +9650,6 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
      return(c);
  }
  
-
-#if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
-/*
-- regclass_swash - prepare the utf8 swash.  Wraps the shared core version to
-create a copy so that changes the caller makes won't change the shared one.
-If <altsvp> is non-null, will return NULL in it, for back-compat.
- */
-SV *
-Perl_regclass_swash(pTHX_ const regexp *prog, const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
-{
-    PERL_ARGS_ASSERT_REGCLASS_SWASH;
-
-    if (altsvp) {
-        *altsvp = NULL;
-    }
-
-    return newSVsv(_get_regclass_nonbitmap_data(prog, node, doinit, listsvp, NULL, NULL));
-}
-
-#endif /* !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION) */
-
  /*
   - reginclass - determine if a character falls into a character class
   
@@ -9790,13 +9688,16 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
                                                1 /* 1 means die */ );
              NOT_REACHED; /* NOTREACHED */
          }
-        if (c > 255 && OP(n) == ANYOFL && ! ANYOFL_UTF8_LOCALE_REQD(flags)) {
+        if (     c > 255
+            &&  (OP(n) == ANYOFL || OP(n) == ANYOFPOSIXL)
+            && ! ANYOFL_UTF8_LOCALE_REQD(flags))
+        {
              _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(c);
          }
      }
  
      /* If this character is potentially in the bitmap, check it */
-    if (c < NUM_ANYOF_CODE_POINTS) {
+    if (c < NUM_ANYOF_CODE_POINTS && OP(n) != ANYOFH) {
         if (ANYOF_BITMAP_TEST(n, c))
             match = TRUE;
         else if ((flags
@@ -9808,14 +9709,14 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
             match = TRUE;
         }
         else if (flags & ANYOF_LOCALE_FLAGS) {
-           if ((flags & ANYOFL_FOLD)
-                && c < 256
+           if (  (flags & ANYOFL_FOLD)
+                && c < sizeof(PL_fold_locale)
                 && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
              {
                  match = TRUE;
              }
-            else if (ANYOF_POSIXL_TEST_ANY_SET(n)
-                     && c < 256
+            else if (   ANYOF_POSIXL_TEST_ANY_SET(n)
+                     && c <= U8_MAX  /* param to isFOO_lc() */
              ) {
  
                  /* The data structure is arranged so bits 0, 2, 4, ... are set
@@ -9897,9 +9798,9 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
                           && IN_UTF8_CTYPE_LOCALE)))
          {
              SV* only_utf8_locale = NULL;
-           SV * const sw = _get_regclass_nonbitmap_data(prog, n, TRUE, 0,
-                                                       &only_utf8_locale, NULL);
-           if (sw) {
+           SV * const definition = _get_regclass_nonbitmap_data(prog, n, TRUE,
+                                                   0, &only_utf8_locale, NULL);
+           if (definition) {
                  U8 utf8_buffer[2];
                 U8 * utf8_p;
                 if (utf8_target) {
@@ -9910,7 +9811,27 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
                     utf8_p = utf8_buffer;
                 }
  
-               if (swash_fetch(sw, utf8_p, TRUE)) {
+                /* Turkish locales have these hard-coded rules overriding
+                 * normal ones */
+                if (   UNLIKELY(PL_in_utf8_turkic_locale)
+                    && isALPHA_FOLD_EQ(*p, 'i'))
+                {
+                    if (*p == 'i') {
+                        if (_invlist_contains_cp(definition,
+                                       LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE))
+                        {
+                            match = TRUE;
+                        }
+                    }
+                    else if (*p == 'I') {
+                        if (_invlist_contains_cp(definition,
+                                                LATIN_SMALL_LETTER_DOTLESS_I))
+                        {
+                            match = TRUE;
+                        }
+                    }
+                }
+                else if (_invlist_contains_cp(definition, c)) {
                     match = TRUE;
                  }
             }
@@ -9919,6 +9840,25 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
              }
         }
  
+        /* In a Turkic locale under folding, hard-code the I i case pair
+         * matches */
+        if (     UNLIKELY(PL_in_utf8_turkic_locale)
+            && ! match
+            &&   (flags & ANYOFL_FOLD)
+            &&   utf8_target)
+        {
+            if (c == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
+               if (ANYOF_BITMAP_TEST(n, 'i')) {
+                    match = TRUE;
+                }
+            }
+            else if (c == LATIN_SMALL_LETTER_DOTLESS_I) {
+               if (ANYOF_BITMAP_TEST(n, 'I')) {
+                    match = TRUE;
+                }
+            }
+        }
+
          if (UNICODE_IS_SUPER(c)
              && (flags
                 & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
@@ -10261,12 +10201,21 @@ Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, cons
       * so code using it would then break), and there has to be a GCB break
       * before and after the character. */
  
+    dVAR;
+
      GCB_enum cp_gcb_val, prev_cp_gcb_val, next_cp_gcb_val;
      const U8 * prev_cp_start;
  
      PERL_ARGS_ASSERT__IS_GRAPHEME;
  
-    /* Unassigned code points are forbidden */
+    if (   UNLIKELY(UNICODE_IS_SUPER(cp))
+        || UNLIKELY(UNICODE_IS_NONCHAR(cp)))
+    {
+        /* These are considered graphemes */
+        return TRUE;
+    }
+
+    /* Otherwise, unassigned code points are forbidden */
      if (UNLIKELY(! ELEMENT_RANGE_MATCHES_INVLIST(
                                      _invlist_search(PL_Assigned_invlist, cp))))
      {
@@ -10304,15 +10253,57 @@ Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, cons
      return isGCB(cp_gcb_val, next_cp_gcb_val, strbeg, s, TRUE);
  }
  
+/*
+=head1 Unicode Support
+
+=for apidoc isSCRIPT_RUN
+
+Returns a bool as to whether or not the sequence of bytes from C<s> up to but
+not including C<send> form a "script run".  C<utf8_target> is TRUE iff the
+sequence starting at C<s> is to be treated as UTF-8.  To be precise, except for
+two degenerate cases given below, this function returns TRUE iff all code
+points in it come from any combination of three "scripts" given by the Unicode
+"Script Extensions" property: Common, Inherited, and possibly one other.
+Additionally all decimal digits must come from the same consecutive sequence of
+10.
+
+For example, if all the characters in the sequence are Greek, or Common, or
+Inherited, this function will return TRUE, provided any decimal digits in it
+are from the same block of digits in Common.  (These are the ASCII digits
+"0".."9" and additionally a block for full width forms of these, and several
+others used in mathematical notation.)   For scripts (unlike Greek) that have
+their own digits defined this will accept either digits from that set or from
+one of the Common digit sets, but not a combination of the two.  Some scripts,
+such as Arabic, have more than one set of digits.  All digits must come from
+the same set for this function to return TRUE.
+
+C<*ret_script>, if C<ret_script> is not NULL, will on return of TRUE
+contain the script found, using the C<SCX_enum> typedef.  Its value will be
+C<SCX_INVALID> if the function returns FALSE.
+
+If the sequence is empty, TRUE is returned, but C<*ret_script> (if asked for)
+will be C<SCX_INVALID>.
+
+If the sequence contains a single code point which is unassigned to a character
+in the version of Unicode being used, the function will return TRUE, and the
+script will be C<SCX_Unknown>.  Any other combination of unassigned code points
+in the input sequence will result in the function treating the input as not
+being a script run.
+
+The returned script will be C<SCX_Inherited> iff all the code points in it are
+from the Inherited script.
+
+Otherwise, the returned script will be C<SCX_Common> iff all the code points in
+it are from the Inherited or Common scripts.
+
+=cut
+
+*/
+
  bool
  Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
  {
-    /* Checks that every character in the sequence from 's' to 'send' is one of
-     * three scripts: Common, Inherited, and possibly one other.  Additionally
-     * all decimal digits must come from the same consecutive sequence of 10.
-     * 'utf8_target' is TRUE iff the sequence is encoded in UTF-8.
-     *
-     * Basically, it looks at each character in the sequence to see if the
+    /* Basically, it looks at each character in the sequence to see if the
       * above conditions are met; if not it fails.  It uses an inversion map to
       * find the enum corresponding to the script of each character.  But this
       * is complicated by the fact that a few code points can be in any of
@@ -10325,17 +10316,6 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
       * These are all defined in charclass_invlists.h */
  
      /* XXX Here are the additional things UTS 39 says could be done:
-     * Mark Chinese strings as “mixed script” if they contain both simplified
-     * (S) and traditional (T) Chinese characters, using the Unihan data in the
-     * Unicode Character Database [UCD].  The criterion can only be applied if
-     * the language of the string is known to be Chinese. So, for example, the
-     * string “写真だけの結婚式 ” is Japanese, and should not be marked as
-     * mixed script because of a mixture of S and T characters.  Testing for
-     * whether a character is S or T needs to be based not on whether the
-     * character has a S or T variant , but whether the character is an S or T
-     * variant. khw notes that the sample contains a Hiragana character, and it
-     * is unclear if absence of any foreign script marks the script as
-     * "Chinese"
       *
       * Forbid sequences of the same nonspacing mark
       *
@@ -10343,13 +10323,16 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
       * characters for at least one language in the Unicode Common Locale Data
       * Repository [CLDR]. */
  
+    dVAR;
  
      /* Things that match /\d/u */
      SV * decimals_invlist = PL_XPosix_ptrs[_CC_DIGIT];
      UV * decimals_array = invlist_array(decimals_invlist);
  
-    /* What code point is the digit '0' of the script run? */
+    /* What code point is the digit '0' of the script run? (0 meaning FALSE if
+     * not currently known) */
      UV zero_of_run = 0;
+
      SCX_enum script_of_run  = SCX_INVALID;   /* Illegal value */
      SCX_enum script_of_char = SCX_INVALID;
  
@@ -10359,22 +10342,51 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
      PERL_UINT_FAST8_T intersection_len = 0;
  
      bool retval = TRUE;
+    SCX_enum * ret_script = NULL;
  
-    assert(send > s);
+    assert(send >= s);
  
      PERL_ARGS_ASSERT_ISSCRIPT_RUN;
  
+    /* All code points in 0..255 are either Common or Latin, so must be a
+     * script run.  We can return immediately unless we need to know which
+     * script it is. */
+    if (! utf8_target && LIKELY(send > s)) {
+        if (ret_script == NULL) {
+            return TRUE;
+        }
+
+        /* If any character is Latin, the run is Latin */
+        while (s < send) {
+            if (isALPHA_L1(*s) && LIKELY(*s != MICRO_SIGN_NATIVE)) {
+                *ret_script = SCX_Latin;
+                return TRUE;
+            }
+        }
+
+        /* Here, all are Common */
+        *ret_script = SCX_Common;
+        return TRUE;
+    }
+
      /* Look at each character in the sequence */
      while (s < send) {
+        /* If the current character being examined is a digit, this is the code
+         * point of the zero for its sequence of 10 */
+        UV zero_of_char;
+
          UV cp;
  
          /* The code allows all scripts to use the ASCII digits.  This is
-         * because they are used in commerce even in scripts that have their
-         * own set.  Hence any ASCII ones found are ok, unless a digit from
-         * another set has already been encountered.  (The other digit ranges
-         * in Common are not similarly blessed) */
+         * because they are in the Common script.  Hence any ASCII ones found
+         * are ok, unless and until a digit from another set has already been
+         * encountered.  digit ranges in Common are not similarly blessed) */
          if (UNLIKELY(isDIGIT(*s))) {
-            if (zero_of_run > 0) {
+            if (UNLIKELY(script_of_run == SCX_Unknown)) {
+                retval = FALSE;
+                break;
+            }
+            if (zero_of_run) {
                  if (zero_of_run != '0') {
                      retval = FALSE;
                      break;
@@ -10388,7 +10400,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          }
  
          /* Here, isn't an ASCII digit.  Find the code point of the character */
-        if (utf8_target && ! UTF8_IS_INVARIANT(*s)) {
+        if (! UTF8_IS_INVARIANT(*s)) {
              Size_t len;
              cp = valid_utf8_to_uvchr((U8 *) s, &len);
              s += len;
@@ -10400,7 +10412,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          /* If is within the range [+0 .. +9] of the script's zero, it also is a
           * digit in that script.  We can skip the rest of this code for this
           * character. */
-        if (UNLIKELY(   zero_of_run > 0
+        if (UNLIKELY(   zero_of_run
                       && cp >= zero_of_run
                       && cp - zero_of_run <= 9))
          {
@@ -10436,9 +10448,20 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
              break;
          }
  
+        /* For the first character, or the run is inherited, the run's script
+         * is set to the char's */
+        if (   UNLIKELY(script_of_run == SCX_INVALID)
+            || UNLIKELY(script_of_run == SCX_Inherited))
+        {
+            script_of_run = script_of_char;
+        }
+
+        /* For the character's script to be Unknown, it must be the first
+         * character in the sequence (for otherwise a test above would have
+         * prevented us from reaching here), and we have set the run's script
+         * to it.  Nothing further to be done for this character */
          if (UNLIKELY(script_of_char == SCX_Unknown)) {
-                script_of_run = SCX_Unknown;
-                continue;
+            continue;
          }
  
          /* We accept 'inherited' script characters currently even at the
@@ -10448,43 +10471,19 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
              continue;
          }
  
-        /* If unknown, the run's script is set to the char's */
-        if (UNLIKELY(script_of_run == SCX_INVALID)) {
-            script_of_run = script_of_char;
-        }
-
          /* If the run so far is Common, and the new character isn't, change the
           * run's script to that of this character */
          if (script_of_run == SCX_Common && script_of_char != SCX_Common) {
-
-            /* But Common contains several sets of digits.  Only the '0' set
-             * can be part of another script. */
-            if (zero_of_run > 0 && zero_of_run != '0') {
-                retval = FALSE;
-                break;
-            }
-
              script_of_run = script_of_char;
          }
  
-        /* All decimal digits must be from the same sequence of 10.  Above, we
-         * handled any ASCII digits without descending to here.  We also
-         * handled the case where we already knew what digit sequence is the
-         * one to use, and the character is in that sequence.  Now that we know
-         * the script, we can use script_zeros[] to directly find which
-         * sequence the script uses, except in a few cases it returns 0 */
-        if (UNLIKELY(zero_of_run == 0) && script_of_char >= 0) {
-            zero_of_run = script_zeros[script_of_char];
-        }
-
-        /* Now we can see if the script of the character is the same as that of
-         * the run */
+        /* Now we can see if the script of the new character is the same as
+         * that of the run */
          if (LIKELY(script_of_char == script_of_run)) {
              /* By far the most common case */
              goto scripts_match;
          }
  
-
          /* Here, the script of the run isn't Common.  But characters in Common
           * match any script */
          if (script_of_char == SCX_Common) {
@@ -10495,6 +10494,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
  
          /* Too early a Unicode version to have a code point belonging to more
           * than one script, so, if the scripts don't exactly match, fail */
+        PERL_UNUSED_VAR(intersection_len);
          retval = FALSE;
          break;
  
@@ -10615,9 +10615,11 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
              /* If there is only a single script in common, set to that.
               * Otherwise, use the intersection going forward */
              Safefree(intersection);
+            intersection = NULL;
              if (intersection_len == 1) {
                  script_of_run = script_of_char = new_overlap[0];
                  Safefree(new_overlap);
+                new_overlap = NULL;
              }
              else {
                  intersection = new_overlap;
@@ -10629,44 +10631,69 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
    scripts_match:
  
          /* Here, the script of the character is compatible with that of the
-         * run.  Either they match exactly, or one or both can be any of
-         * several scripts, and the intersection is not empty.  If the
-         * character is not a decimal digit, we are done with it.  Otherwise,
-         * it could still fail if it is from a different set of 10 than seen
-         * already (or we may not have seen any, and we need to set the
-         * sequence).  If we have determined a single script and that script
-         * only has one set of digits (almost all scripts are like that), then
-         * this isn't a problem, as any digit must come from the same sequence.
-         * The only scripts that have multiple sequences have been constructed
-         * to be 0 in 'script_zeros[]'.
-         *
-         * Here we check if it is a digit. */
-        if (    cp >= FIRST_NON_ASCII_DECIMAL_DIGIT
-            && (   (          zero_of_run == 0
-                    || (  (   script_of_char >= 0
-                           && script_zeros[script_of_char] == 0)
-                        ||    intersection))))
+         * run.  That means that in most cases, it continues the script run.
+         * Either it and the run match exactly, or one or both can be in any of
+         * several scripts, and the intersection is not empty.  However, if the
+         * character is a decimal digit, it could still mean failure if it is
+         * from the wrong sequence of 10.  So, we need to look at if it's a
+         * digit.  We've already handled the 10 decimal digits, and the next
+         * lowest one is this one: */
+        if (cp < FIRST_NON_ASCII_DECIMAL_DIGIT) {
+            continue;   /* Not a digit; this character is part of the run */
+        }
+
+        /* If we have a definitive '0' for the script of this character, we
+         * know that for this to be a digit, it must be in the range of +0..+9
+         * of that zero. */
+        if (   script_of_char >= 0
+            && (zero_of_char = script_zeros[script_of_char]))
          {
-            SSize_t range_zero_index;
-            range_zero_index = _invlist_search(decimals_invlist, cp);
-            if (   LIKELY(range_zero_index >= 0)
-                && ELEMENT_RANGE_MATCHES_INVLIST(range_zero_index))
+            if (   cp < zero_of_char
+                || cp > zero_of_char + 9)
              {
-                UV range_zero = decimals_array[range_zero_index];
-                if (zero_of_run) {
-                    if (zero_of_run != range_zero) {
-                        retval = FALSE;
-                        break;
-                    }
-                }
-                else {
-                    zero_of_run = range_zero;
-                }
+                continue;   /* Not a digit; this character is part of the run
+                             */
+            }
+
+        }
+        else {  /* Need to look up if this character is a digit or not */
+            SSize_t index_of_zero_of_char;
+            index_of_zero_of_char = _invlist_search(decimals_invlist, cp);
+            if (     UNLIKELY(index_of_zero_of_char < 0)
+                || ! ELEMENT_RANGE_MATCHES_INVLIST(index_of_zero_of_char))
+            {
+                continue;   /* Not a digit; this character is part of the run.
+                             */
+            }
+
+            zero_of_char = decimals_array[index_of_zero_of_char];
+        }
+
+        /* Here, the character is a decimal digit, and the zero of its sequence
+         * of 10 is in 'zero_of_char'.  If we already have a zero for this run,
+         * they better be the same. */
+        if (zero_of_run) {
+            if (zero_of_run != zero_of_char) {
+                retval = FALSE;
+                break;
              }
          }
+        else {  /* Otherwise we now have a zero for this run */
+            zero_of_run = zero_of_char;
+        }
      } /* end of looping through CLOSESR text */
  
      Safefree(intersection);
+
+    if (ret_script != NULL) {
+        if (retval) {
+            *ret_script = script_of_run;
+        }
+        else {
+            *ret_script = SCX_INVALID;
+        }
+    }
+
      return retval;
  }