[perl #115440] Fix various leaks with fatal FETCH

[perl5.git] / regexec.c
diff --git a/regexec.c b/regexec.c

index bad40dc..4029f1e 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -37,6 +37,11 @@
  #include "re_top.h"
  #endif
  
  #include "re_top.h"
  #endif
  
+/* At least one required character in the target string is expressible only in
+ * UTF-8. */
+const char* const non_utf8_target_but_utf8_required
+                = "Can't match, because target string needs to be in UTF-8\n";
+
  /*
   * pregcomp and pregexec -- regsub and regerror are not used in perl
   *
  /*
   * pregcomp and pregexec -- regsub and regerror are not used in perl
   *
@@ -81,6 +86,7 @@
  #endif
  
  #include "inline_invlist.c"
  #endif
  
  #include "inline_invlist.c"
+#include "unicode_constants.h"
  
  #define RF_tainted     1       /* tainted information used? e.g. locale */
  #define RF_warned      2               /* warned about big count? */
  
  #define RF_tainted     1       /* tainted information used? e.g. locale */
  #define RF_warned      2               /* warned about big count? */
@@ -89,14 +95,16 @@
  
  #define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
  
  
  #define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
  
+#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
+
  #ifndef STATIC
  #define        STATIC  static
  #endif
  
  #ifndef STATIC
  #define        STATIC  static
  #endif
  
-/* Valid for non-utf8 strings, non-ANYOFV nodes only: avoids the reginclass
+/* Valid for non-utf8 strings: avoids the reginclass
   * call if there are no complications: i.e., if everything matchable is
   * straight forward in the bitmap */
   * call if there are no complications: i.e., if everything matchable is
   * straight forward in the bitmap */
-#define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0)   \
+#define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0)   \
                                               : ANYOF_BITMAP_TEST(p,*(c)))
  
  /*
                                               : ANYOF_BITMAP_TEST(p,*(c)))
  
  /*
@@ -120,21 +128,26 @@
  #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
  #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
  
  #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
  #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
  
+
+#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
+#define NEXTCHR_IS_EOS (nextchr < 0)
+
+#define SET_nextchr \
+    nextchr = ((locinput < PL_regeol) ? UCHARAT(locinput) : NEXTCHR_EOS)
+
+#define SET_locinput(p) \
+    locinput = (p);  \
+    SET_nextchr
+
+
  /* these are unrolled below in the CCC_TRY_XXX defined */
  /* these are unrolled below in the CCC_TRY_XXX defined */
-#ifdef EBCDIC
-    /* Often 'str' is a hard-coded utf8 string instead of utfebcdic. so just
-     * skip the check on EBCDIC platforms */
-#   define LOAD_UTF8_CHARCLASS(class,str) LOAD_UTF8_CHARCLASS_NO_CHECK(class)
-#else
-#   define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
+#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
      if (!CAT2(PL_utf8_,class)) { \
         bool ok; \
         ENTER; save_re_context(); \
         ok=CAT2(is_utf8_,class)((const U8*)str); \
          PERL_UNUSED_VAR(ok); \
         assert(ok); assert(CAT2(PL_utf8_,class)); LEAVE; } } STMT_END
      if (!CAT2(PL_utf8_,class)) { \
         bool ok; \
         ENTER; save_re_context(); \
         ok=CAT2(is_utf8_,class)((const U8*)str); \
          PERL_UNUSED_VAR(ok); \
         assert(ok); assert(CAT2(PL_utf8_,class)); LEAVE; } } STMT_END
-#endif
-
  /* Doesn't do an assert to verify that is correct */
  #define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
      if (!CAT2(PL_utf8_,class)) { \
  /* Doesn't do an assert to verify that is correct */
  #define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
      if (!CAT2(PL_utf8_,class)) { \
@@ -148,21 +161,10 @@
  #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
  
  #define LOAD_UTF8_CHARCLASS_GCB()  /* Grapheme cluster boundaries */        \
  #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
  
  #define LOAD_UTF8_CHARCLASS_GCB()  /* Grapheme cluster boundaries */        \
-       LOAD_UTF8_CHARCLASS(X_begin, " ");                                  \
-       LOAD_UTF8_CHARCLASS(X_non_hangul, "A");                             \
-       /* These are utf8 constants, and not utf-ebcdic constants, so the   \
-           * assert should likely and hopefully fail on an EBCDIC machine */ \
-       LOAD_UTF8_CHARCLASS(X_extend, "\xcc\x80"); /* U+0300 */             \
-                                                                           \
-       /* No asserts are done for these, in case called on an early        \
-           * Unicode version in which they map to nothing */               \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* U+0E40 "\xe0\xb9\x80" */ \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_L);          /* U+1100 "\xe1\x84\x80" */ \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV);     /* U+AC00 "\xea\xb0\x80" */ \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_LVT);    /* U+AC01 "\xea\xb0\x81" */ \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV_LVT_V);/* U+AC01 "\xea\xb0\x81" */\
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_T);      /* U+11A8 "\xe1\x86\xa8" */ \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_V)       /* U+1160 "\xe1\x85\xa0" */  
+        /* No asserts are done for some of these, in case called on a   */  \
+        /* Unicode version in which they map to nothing */                  \
+       LOAD_UTF8_CHARCLASS(X_regular_begin, HYPHEN_UTF8);                          \
+       LOAD_UTF8_CHARCLASS(X_extend, COMBINING_GRAVE_ACCENT_UTF8);         \
  
  #define PLACEHOLDER    /* Something for the preprocessor to grab onto */
  
  
  #define PLACEHOLDER    /* Something for the preprocessor to grab onto */
  
@@ -182,7 +184,7 @@
   * fails, or advance to the next character */
  
  #define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR)                \
   * fails, or advance to the next character */
  
  #define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR)                \
-    if (locinput >= PL_regeol) {                                              \
+    if (NEXTCHR_IS_EOS) {                                              \
         sayNO;                                                                \
      }                                                                         \
      if (utf8_target && UTF8_IS_CONTINUED(nextchr)) {                          \
         sayNO;                                                                \
      }                                                                         \
      if (utf8_target && UTF8_IS_CONTINUED(nextchr)) {                          \
@@ -190,15 +192,11 @@
         if (POS_OR_NEG (UTF8_TEST)) {                                         \
             sayNO;                                                            \
         }                                                                     \
         if (POS_OR_NEG (UTF8_TEST)) {                                         \
             sayNO;                                                            \
         }                                                                     \
-       locinput += PL_utf8skip[nextchr];                                     \
-       nextchr = UCHARAT(locinput);                                          \
-       break;                                                                \
      }                                                                         \
      }                                                                         \
-    if (POS_OR_NEG (FUNC(nextchr))) {                                         \
-       sayNO;                                                                \
+    else if (POS_OR_NEG (FUNC(nextchr))) {                                    \
+            sayNO;                                                            \
      }                                                                         \
      }                                                                         \
-    nextchr = UCHARAT(++locinput);                                            \
-    break;
+    goto increment_locinput;
  
  /* Handle the non-locale cases for a character class and its complement.  It
   * calls _CCC_TRY_CODE with a ! to complement the test for the character class.
  
  /* Handle the non-locale cases for a character class and its complement.  It
   * calls _CCC_TRY_CODE with a ! to complement the test for the character class.
@@ -240,24 +238,17 @@
         _CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput),       \
                        CLASS, STR)                                            \
      case NAMEA:                                                               \
         _CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput),       \
                        CLASS, STR)                                            \
      case NAMEA:                                                               \
-       if (locinput >= PL_regeol || ! FUNCA(nextchr)) {                      \
+       if (NEXTCHR_IS_EOS || ! FUNCA(nextchr)) {                      \
             sayNO;                                                            \
         }                                                                     \
         /* Matched a utf8-invariant, so don't have to worry about utf8 */     \
             sayNO;                                                            \
         }                                                                     \
         /* Matched a utf8-invariant, so don't have to worry about utf8 */     \
-       nextchr = UCHARAT(++locinput);                                        \
+       locinput++;                                        \
         break;                                                                \
      case NNAMEA:                                                              \
         break;                                                                \
      case NNAMEA:                                                              \
-       if (locinput >= PL_regeol || FUNCA(nextchr)) {                        \
+       if (NEXTCHR_IS_EOS || FUNCA(nextchr)) {                        \
             sayNO;                                                            \
         }                                                                     \
             sayNO;                                                            \
         }                                                                     \
-       if (utf8_target) {                                                    \
-           locinput += PL_utf8skip[nextchr];                                 \
-           nextchr = UCHARAT(locinput);                                      \
-       }                                                                     \
-       else {                                                                \
-           nextchr = UCHARAT(++locinput);                                    \
-       }                                                                     \
-       break;                                                                \
+        goto increment_locinput;                                              \
      /* Generate the non-locale cases */                                       \
      _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
  
      /* Generate the non-locale cases */                                       \
      _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
  
@@ -512,10 +503,13 @@ S_regcp_restore(pTHX_ regexp *rex, I32 ix)
  I32
  Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
          char *strbeg, I32 minend, SV *screamer, U32 nosave)
  I32
  Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
          char *strbeg, I32 minend, SV *screamer, U32 nosave)
-/* strend: pointer to null at end of string */
-/* strbeg: real beginning of string */
-/* minend: end of match must be >=minend after stringarg. */
-/* nosave: For optimizations. */
+/* stringarg: the point in the string at which to begin matching */
+/* strend:    pointer to null at end of string */
+/* strbeg:    real beginning of string */
+/* minend:    end of match must be >= minend bytes after stringarg. */
+/* screamer:  SV being matched: only used for utf8 flag, pos() etc; string
+ *            itself is accessed via the pointers above */
+/* nosave:    For optimizations. */
  {
      PERL_ARGS_ASSERT_PREGEXEC;
  
  {
      PERL_ARGS_ASSERT_PREGEXEC;
  
@@ -622,22 +616,36 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
         goto fail;
      }
                  
         goto fail;
      }
                  
-    strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
+    /* XXX we need to pass strbeg as a separate arg: the following is
+     * guesswork and can be wrong... */
+    if (sv && SvPOK(sv)) {
+        char * p   = SvPVX(sv);
+        STRLEN cur = SvCUR(sv); 
+        if (p <= strpos && strpos < p + cur) {
+            strbeg = p;
+            assert(p <= strend && strend <= p + cur);
+        }
+        else
+            strbeg = strend - cur;
+    }
+    else 
+        strbeg = strpos;
+
      PL_regeol = strend;
      if (utf8_target) {
         if (!prog->check_utf8 && prog->check_substr)
             to_utf8_substr(prog);
         check = prog->check_utf8;
      } else {
      PL_regeol = strend;
      if (utf8_target) {
         if (!prog->check_utf8 && prog->check_substr)
             to_utf8_substr(prog);
         check = prog->check_utf8;
      } else {
-       if (!prog->check_substr && prog->check_utf8)
-           to_byte_substr(prog);
+       if (!prog->check_substr && prog->check_utf8) {
+           if (! to_byte_substr(prog)) {
+                DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+                                        non_utf8_target_but_utf8_required));
+                goto fail;
+            }
+        }
         check = prog->check_substr;
      }
         check = prog->check_substr;
      }
-    if (check == &PL_sv_undef) {
-       DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
-               "Non-utf8 string cannot match utf8 check string\n"));
-       goto fail;
-    }
      if (prog->extflags & RXf_ANCH) {   /* Match at beg-of-str or after \n */
         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
                      || ( (prog->extflags & RXf_ANCH_BOL)
      if (prog->extflags & RXf_ANCH) {   /* Match at beg-of-str or after \n */
         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
                      || ( (prog->extflags & RXf_ANCH_BOL)
@@ -652,8 +660,12 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
               goto fail;
           }
               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
               goto fail;
           }
-         if (prog->check_offset_min == prog->check_offset_max &&
-             !(prog->extflags & RXf_CANY_SEEN)) {
+         if (prog->check_offset_min == prog->check_offset_max
+              && !(prog->extflags & RXf_CANY_SEEN)
+              && ! multiline)   /* /m can cause \n's to match that aren't
+                                   accounted for in the string max length.
+                                   See [perl #115242] */
+          {
             /* Substring at constant offset from beg-of-str... */
             I32 slen;
  
             /* Substring at constant offset from beg-of-str... */
             I32 slen;
  
@@ -1263,7 +1275,7 @@ STMT_START {                                              \
  
  #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
  STMT_START {                                          \
  
  #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
  STMT_START {                                          \
-    while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
+    while (s < strend && s + (uskip = UTF8SKIP(s)) <= strend) {     \
         CoDe                                          \
         s += uskip;                                   \
      }                                                 \
         CoDe                                          \
         s += uskip;                                   \
      }                                                 \
@@ -1444,12 +1456,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          
         /* We know what class it must start with. */
         switch (OP(c)) {
          
         /* We know what class it must start with. */
         switch (OP(c)) {
-       case ANYOFV:
         case ANYOF:
         case ANYOF:
-           if (utf8_target || OP(c) == ANYOFV) {
-               STRLEN inclasslen = strend - s;
+           if (utf8_target) {
                 REXEC_FBC_UTF8_CLASS_SCAN(
                 REXEC_FBC_UTF8_CLASS_SCAN(
-                          reginclass(prog, c, (U8*)s, &inclasslen, utf8_target));
+                          reginclass(prog, c, (U8*)s, utf8_target));
             }
             else {
                 REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
             }
             else {
                 REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s));
@@ -1788,32 +1798,32 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
             break;
         case LNBREAK:
             REXEC_FBC_CSCAN(
             break;
         case LNBREAK:
             REXEC_FBC_CSCAN(
-               is_LNBREAK_utf8(s),
-               is_LNBREAK_latin1(s)
+               is_LNBREAK_utf8_safe(s, strend),
+               is_LNBREAK_latin1_safe(s, strend)
             );
             break;
         case VERTWS:
             REXEC_FBC_CSCAN(
             );
             break;
         case VERTWS:
             REXEC_FBC_CSCAN(
-               is_VERTWS_utf8(s),
-               is_VERTWS_latin1(s)
+               is_VERTWS_utf8_safe(s, strend),
+               is_VERTWS_latin1_safe(s, strend)
             );
             break;
         case NVERTWS:
             REXEC_FBC_CSCAN(
             );
             break;
         case NVERTWS:
             REXEC_FBC_CSCAN(
-               !is_VERTWS_utf8(s),
-               !is_VERTWS_latin1(s)
+               !is_VERTWS_utf8_safe(s, strend),
+               !is_VERTWS_latin1_safe(s, strend)
             );
             break;
         case HORIZWS:
             REXEC_FBC_CSCAN(
             );
             break;
         case HORIZWS:
             REXEC_FBC_CSCAN(
-               is_HORIZWS_utf8(s),
-               is_HORIZWS_latin1(s)
+               is_HORIZWS_utf8_safe(s, strend),
+               is_HORIZWS_latin1_safe(s, strend)
             );
             break;
         case NHORIZWS:
             REXEC_FBC_CSCAN(
             );
             break;
         case NHORIZWS:
             REXEC_FBC_CSCAN(
-               !is_HORIZWS_utf8(s),
-               !is_HORIZWS_latin1(s)
+               !is_HORIZWS_utf8_safe(s, strend),
+               !is_HORIZWS_latin1_safe(s, strend)
             );      
             break;
         case POSIXA:
             );      
             break;
         case POSIXA:
@@ -1948,16 +1958,24 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                              
                          }
                          points[pointpos++ % maxlen]= uc;
                              
                          }
                          points[pointpos++ % maxlen]= uc;
-                       REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
+                        if (foldlen || uc < (U8*)strend) {
+                            REXEC_TRIE_READ_CHAR(trie_type, trie,
+                                             widecharmap, uc,
                                              uscan, len, uvc, charid, foldlen,
                                              foldbuf, uniflags);
                                              uscan, len, uvc, charid, foldlen,
                                              foldbuf, uniflags);
-                        DEBUG_TRIE_EXECUTE_r({
-                            dump_exec_pos( (char *)uc, c, strend, real_start, 
-                                s,   utf8_target );
-                            PerlIO_printf(Perl_debug_log,
-                                " Charid:%3u CP:%4"UVxf" ",
-                                 charid, uvc);
-                        });
+                            DEBUG_TRIE_EXECUTE_r({
+                                dump_exec_pos( (char *)uc, c, strend,
+                                            real_start, s, utf8_target);
+                                PerlIO_printf(Perl_debug_log,
+                                    " Charid:%3u CP:%4"UVxf" ",
+                                     charid, uvc);
+                            });
+                        }
+                        else {
+                            len = 0;
+                            charid = 0;
+                        }
+
  
                          do {
  #ifdef DEBUGGING
  
                          do {
  #ifdef DEBUGGING
@@ -2061,13 +2079,17 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
  I32
  Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
  I32
  Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
-/* strend: pointer to null at end of string */
-/* strbeg: real beginning of string */
-/* minend: end of match must be >=minend after stringarg. */
-/* data: May be used for some additional optimizations. 
-         Currently its only used, with a U32 cast, for transmitting 
-         the ganch offset when doing a /g match. This will change */
-/* nosave: For optimizations. */
+/* stringarg: the point in the string at which to begin matching */
+/* strend:    pointer to null at end of string */
+/* strbeg:    real beginning of string */
+/* minend:    end of match must be >= minend bytes after stringarg. */
+/* sv:        SV being matched: only used for utf8 flag, pos() etc; string
+ *            itself is accessed via the pointers above */
+/* data:      May be used for some additional optimizations.
+              Currently its only used, with a U32 cast, for transmitting
+              the ganch offset when doing a /g match. This will change */
+/* nosave:    For optimizations. */
+
  {
      dVAR;
      struct regexp *const prog = (struct regexp *)SvANY(rx);
  {
      dVAR;
      struct regexp *const prog = (struct regexp *)SvANY(rx);
@@ -2304,11 +2326,11 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
  #ifdef DEBUGGING
         int did_match = 0;
  #endif
  #ifdef DEBUGGING
         int did_match = 0;
  #endif
-       if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
-           utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
-       ch = SvPVX_const(utf8_target ? prog->anchored_utf8 : prog->anchored_substr)[0];
-
         if (utf8_target) {
         if (utf8_target) {
+            if (! prog->anchored_utf8) {
+                to_utf8_substr(prog);
+            }
+            ch = SvPVX_const(prog->anchored_utf8)[0];
             REXEC_FBC_SCAN(
                 if (*s == ch) {
                     DEBUG_EXECUTE_r( did_match = 1 );
             REXEC_FBC_SCAN(
                 if (*s == ch) {
                     DEBUG_EXECUTE_r( did_match = 1 );
@@ -2318,8 +2340,17 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
                         s += UTF8SKIP(s);
                 }
             );
                         s += UTF8SKIP(s);
                 }
             );
+
         }
         else {
         }
         else {
+            if (! prog->anchored_substr) {
+                if (! to_byte_substr(prog)) {
+                    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+                                            non_utf8_target_but_utf8_required));
+                    goto phooey;
+                }
+            }
+            ch = SvPVX_const(prog->anchored_substr)[0];
             REXEC_FBC_SCAN(
                 if (*s == ch) {
                     DEBUG_EXECUTE_r( did_match = 1 );
             REXEC_FBC_SCAN(
                 if (*s == ch) {
                     DEBUG_EXECUTE_r( did_match = 1 );
@@ -2348,23 +2379,44 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
         int did_match = 0;
  #endif
         if (prog->anchored_substr || prog->anchored_utf8) {
         int did_match = 0;
  #endif
         if (prog->anchored_substr || prog->anchored_utf8) {
-           if (!(utf8_target ? prog->anchored_utf8 : prog->anchored_substr))
-               utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
-           must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
+           if (utf8_target) {
+                if (! prog->anchored_utf8) {
+                    to_utf8_substr(prog);
+                }
+                must = prog->anchored_utf8;
+            }
+            else {
+                if (! prog->anchored_substr) {
+                    if (! to_byte_substr(prog)) {
+                        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+                                            non_utf8_target_but_utf8_required));
+                        goto phooey;
+                    }
+                }
+                must = prog->anchored_substr;
+            }
             back_max = back_min = prog->anchored_offset;
         } else {
             back_max = back_min = prog->anchored_offset;
         } else {
-           if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
-               utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
-           must = utf8_target ? prog->float_utf8 : prog->float_substr;
+           if (utf8_target) {
+                if (! prog->float_utf8) {
+                    to_utf8_substr(prog);
+                }
+                must = prog->float_utf8;
+            }
+            else {
+                if (! prog->float_substr) {
+                    if (! to_byte_substr(prog)) {
+                        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+                                            non_utf8_target_but_utf8_required));
+                        goto phooey;
+                    }
+                }
+                must = prog->float_substr;
+            }
             back_max = prog->float_max_offset;
             back_min = prog->float_min_offset;
         }
             back_max = prog->float_max_offset;
             back_min = prog->float_min_offset;
         }
-       
             
             
-       if (must == &PL_sv_undef)
-           /* could not downgrade utf8 check substring, so must fail */
-           goto phooey;
-
          if (back_min<0) {
             last = strend;
         } else {
          if (back_min<0) {
             last = strend;
         } else {
@@ -2401,7 +2453,11 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
                 while (s <= last1) {
                     if (regtry(&reginfo, &s))
                         goto got_it;
                 while (s <= last1) {
                     if (regtry(&reginfo, &s))
                         goto got_it;
-                   s += UTF8SKIP(s);
+                    if (s >= last1) {
+                        s++; /* to break out of outer loop */
+                        break;
+                    }
+                    s += UTF8SKIP(s);
                 }
             }
             else {
                 }
             }
             else {
@@ -2454,44 +2510,62 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
             STRLEN len;
             const char *little;
  
             STRLEN len;
             const char *little;
  
-           if (!(utf8_target ? prog->float_utf8 : prog->float_substr))
-               utf8_target ? to_utf8_substr(prog) : to_byte_substr(prog);
-           float_real = utf8_target ? prog->float_utf8 : prog->float_substr;
+           if (utf8_target) {
+                if (! prog->float_utf8) {
+                    to_utf8_substr(prog);
+                }
+                float_real = prog->float_utf8;
+            }
+            else {
+                if (! prog->float_substr) {
+                    if (! to_byte_substr(prog)) {
+                        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+                                            non_utf8_target_but_utf8_required));
+                        goto phooey;
+                    }
+                }
+                float_real = prog->float_substr;
+            }
  
              little = SvPV_const(float_real, len);
             if (SvTAIL(float_real)) {
  
              little = SvPV_const(float_real, len);
             if (SvTAIL(float_real)) {
-                   /* This means that float_real contains an artificial \n on the end
-                    * due to the presence of something like this: /foo$/
-                    * where we can match both "foo" and "foo\n" at the end of the string.
-                    * So we have to compare the end of the string first against the float_real
-                    * without the \n and then against the full float_real with the string.
-                    * We have to watch out for cases where the string might be smaller
-                    * than the float_real or the float_real without the \n.
-                    */
+                    /* This means that float_real contains an artificial \n on
+                     * the end due to the presence of something like this:
+                     * /foo$/ where we can match both "foo" and "foo\n" at the
+                     * end of the string.  So we have to compare the end of the
+                     * string first against the float_real without the \n and
+                     * then against the full float_real with the string.  We
+                     * have to watch out for cases where the string might be
+                     * smaller than the float_real or the float_real without
+                     * the \n. */
                     char *checkpos= strend - len;
                     DEBUG_OPTIMISE_r(
                         PerlIO_printf(Perl_debug_log,
                             "%sChecking for float_real.%s\n",
                             PL_colors[4], PL_colors[5]));
                     if (checkpos + 1 < strbeg) {
                     char *checkpos= strend - len;
                     DEBUG_OPTIMISE_r(
                         PerlIO_printf(Perl_debug_log,
                             "%sChecking for float_real.%s\n",
                             PL_colors[4], PL_colors[5]));
                     if (checkpos + 1 < strbeg) {
-                       /* can't match, even if we remove the trailing \n string is too short to match */
+                        /* can't match, even if we remove the trailing \n
+                         * string is too short to match */
                         DEBUG_EXECUTE_r(
                             PerlIO_printf(Perl_debug_log,
                                 "%sString shorter than required trailing substring, cannot match.%s\n",
                                 PL_colors[4], PL_colors[5]));
                         goto phooey;
                     } else if (memEQ(checkpos + 1, little, len - 1)) {
                         DEBUG_EXECUTE_r(
                             PerlIO_printf(Perl_debug_log,
                                 "%sString shorter than required trailing substring, cannot match.%s\n",
                                 PL_colors[4], PL_colors[5]));
                         goto phooey;
                     } else if (memEQ(checkpos + 1, little, len - 1)) {
-                       /* can match, the end of the string matches without the "\n" */
+                        /* can match, the end of the string matches without the
+                         * "\n" */
                         last = checkpos + 1;
                     } else if (checkpos < strbeg) {
                         last = checkpos + 1;
                     } else if (checkpos < strbeg) {
-                       /* cant match, string is too short when the "\n" is included */
+                        /* cant match, string is too short when the "\n" is
+                         * included */
                         DEBUG_EXECUTE_r(
                             PerlIO_printf(Perl_debug_log,
                                 "%sString does not contain required trailing substring, cannot match.%s\n",
                                 PL_colors[4], PL_colors[5]));
                         goto phooey;
                     } else if (!multiline) {
                         DEBUG_EXECUTE_r(
                             PerlIO_printf(Perl_debug_log,
                                 "%sString does not contain required trailing substring, cannot match.%s\n",
                                 PL_colors[4], PL_colors[5]));
                         goto phooey;
                     } else if (!multiline) {
-                       /* non multiline match, so compare with the "\n" at the end of the string */
+                        /* non multiline match, so compare with the "\n" at the
+                         * end of the string */
                         if (memEQ(checkpos, little, len)) {
                             last= checkpos;
                         } else {
                         if (memEQ(checkpos, little, len)) {
                             last= checkpos;
                         } else {
@@ -2502,7 +2576,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
                             goto phooey;
                         }
                     } else {
                             goto phooey;
                         }
                     } else {
-                       /* multiline match, so we have to search for a place where the full string is located */
+                        /* multiline match, so we have to search for a place
+                         * where the full string is located */
                         goto find_last;
                     }
             } else {
                         goto find_last;
                     }
             } else {
@@ -2513,11 +2588,11 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
                         last = strend;  /* matching "$" */
             }
             if (!last) {
                         last = strend;  /* matching "$" */
             }
             if (!last) {
-               /* at one point this block contained a comment which was probably
-                * incorrect, which said that this was a "should not happen" case.
-                * Even if it was true when it was written I am pretty sure it is
-                * not anymore, so I have removed the comment and replaced it with
-                * this one. Yves */
+                /* at one point this block contained a comment which was
+                 * probably incorrect, which said that this was a "should not
+                 * happen" case.  Even if it was true when it was written I am
+                 * pretty sure it is not anymore, so I have removed the comment
+                 * and replaced it with this one. Yves */
                 DEBUG_EXECUTE_r(
                     PerlIO_printf(Perl_debug_log,
                         "String does not contain required substring, cannot match.\n"
                 DEBUG_EXECUTE_r(
                     PerlIO_printf(Perl_debug_log,
                         "String does not contain required substring, cannot match.\n"
@@ -2569,9 +2644,7 @@ got_it:
  
      /* make sure $`, $&, $', and $digit will work later */
      if ( !(flags & REXEC_NOT_FIRST) ) {
  
      /* make sure $`, $&, $', and $digit will work later */
      if ( !(flags & REXEC_NOT_FIRST) ) {
-       RX_MATCH_COPY_FREE(rx);
         if (flags & REXEC_COPY_STR) {
         if (flags & REXEC_COPY_STR) {
-           const I32 i = PL_regeol - startpos + (stringarg - strbeg);
  #ifdef PERL_OLD_COPY_ON_WRITE
             if ((SvIsCOW(sv)
                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
  #ifdef PERL_OLD_COPY_ON_WRITE
             if ((SvIsCOW(sv)
                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
@@ -2580,20 +2653,109 @@ got_it:
                                   "Copy on write: regexp capture, type %d\n",
                                   (int) SvTYPE(sv));
                 }
                                   "Copy on write: regexp capture, type %d\n",
                                   (int) SvTYPE(sv));
                 }
+                RX_MATCH_COPY_FREE(rx);
                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
                 assert (SvPOKp(prog->saved_copy));
                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
                 assert (SvPOKp(prog->saved_copy));
+                prog->sublen  = PL_regeol - strbeg;
+                prog->suboffset = 0;
+                prog->subcoffset = 0;
             } else
  #endif
             {
             } else
  #endif
             {
-               RX_MATCH_COPIED_on(rx);
-               s = savepvn(strbeg, i);
-               prog->subbeg = s;
-           }
-           prog->sublen = i;
+                I32 min = 0;
+                I32 max = PL_regeol - strbeg;
+                I32 sublen;
+
+                if (    (flags & REXEC_COPY_SKIP_POST)
+                    && !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
+                    && !(PL_sawampersand & SAWAMPERSAND_RIGHT)
+                ) { /* don't copy $' part of string */
+                    U32 n = 0;
+                    max = -1;
+                    /* calculate the right-most part of the string covered
+                     * by a capture. Due to look-ahead, this may be to
+                     * the right of $&, so we have to scan all captures */
+                    while (n <= prog->lastparen) {
+                        if (prog->offs[n].end > max)
+                            max = prog->offs[n].end;
+                        n++;
+                    }
+                    if (max == -1)
+                        max = (PL_sawampersand & SAWAMPERSAND_LEFT)
+                                ? prog->offs[0].start
+                                : 0;
+                    assert(max >= 0 && max <= PL_regeol - strbeg);
+                }
+
+                if (    (flags & REXEC_COPY_SKIP_PRE)
+                    && !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */
+                    && !(PL_sawampersand & SAWAMPERSAND_LEFT)
+                ) { /* don't copy $` part of string */
+                    U32 n = 0;
+                    min = max;
+                    /* calculate the left-most part of the string covered
+                     * by a capture. Due to look-behind, this may be to
+                     * the left of $&, so we have to scan all captures */
+                    while (min && n <= prog->lastparen) {
+                        if (   prog->offs[n].start != -1
+                            && prog->offs[n].start < min)
+                        {
+                            min = prog->offs[n].start;
+                        }
+                        n++;
+                    }
+                    if ((PL_sawampersand & SAWAMPERSAND_RIGHT)
+                        && min >  prog->offs[0].end
+                    )
+                        min = prog->offs[0].end;
+
+                }
+
+                assert(min >= 0 && min <= max && min <= PL_regeol - strbeg);
+                sublen = max - min;
+
+                if (RX_MATCH_COPIED(rx)) {
+                    if (sublen > prog->sublen)
+                        prog->subbeg =
+                                (char*)saferealloc(prog->subbeg, sublen+1);
+                }
+                else
+                    prog->subbeg = (char*)safemalloc(sublen+1);
+                Copy(strbeg + min, prog->subbeg, sublen, char);
+                prog->subbeg[sublen] = '\0';
+                prog->suboffset = min;
+                prog->sublen = sublen;
+                RX_MATCH_COPIED_on(rx);
+           }
+            prog->subcoffset = prog->suboffset;
+            if (prog->suboffset && utf8_target) {
+                /* Convert byte offset to chars.
+                 * XXX ideally should only compute this if @-/@+
+                 * has been seen, a la PL_sawampersand ??? */
+
+                /* If there's a direct correspondence between the
+                 * string which we're matching and the original SV,
+                 * then we can use the utf8 len cache associated with
+                 * the SV. In particular, it means that under //g,
+                 * sv_pos_b2u() will use the previously cached
+                 * position to speed up working out the new length of
+                 * subcoffset, rather than counting from the start of
+                 * the string each time. This stops
+                 *   $x = "\x{100}" x 1E6; 1 while $x =~ /(.)/g;
+                 * from going quadratic */
+                if (SvPOKp(sv) && SvPVX(sv) == strbeg)
+                    sv_pos_b2u(sv, &(prog->subcoffset));
+                else
+                    prog->subcoffset = utf8_length((U8*)strbeg,
+                                        (U8*)(strbeg+prog->suboffset));
+            }
         }
         else {
         }
         else {
+            RX_MATCH_COPY_FREE(rx);
             prog->subbeg = strbeg;
             prog->subbeg = strbeg;
+           prog->suboffset = 0;
+           prog->subcoffset = 0;
             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
         }
      }
             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
         }
      }
@@ -2616,7 +2778,6 @@ phooey:
          Safefree(prog->offs);
          prog->offs = swap;
      }
          Safefree(prog->offs);
          prog->offs = swap;
      }
-
      return 0;
  }
  
      return 0;
  }
  
@@ -2635,12 +2796,13 @@ phooey:
   - regtry - try match at specific point
   */
  STATIC I32                     /* 0 failure, 1 success */
   - regtry - try match at specific point
   */
  STATIC I32                     /* 0 failure, 1 success */
-S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
+S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
  {
      dVAR;
      CHECKPOINT lastcp;
      REGEXP *const rx = reginfo->prog;
      regexp *const prog = (struct regexp *)SvANY(rx);
  {
      dVAR;
      CHECKPOINT lastcp;
      REGEXP *const rx = reginfo->prog;
      regexp *const prog = (struct regexp *)SvANY(rx);
+    I32 result;
      RXi_GET_DECL(prog,progi);
      GET_RE_DEBUG_FLAGS_DECL;
  
      RXi_GET_DECL(prog,progi);
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -2698,6 +2860,8 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
                 $` inside (?{}) could fail... */
             PL_reg_oldsaved = prog->subbeg;
             PL_reg_oldsavedlen = prog->sublen;
                 $` inside (?{}) could fail... */
             PL_reg_oldsaved = prog->subbeg;
             PL_reg_oldsavedlen = prog->sublen;
+           PL_reg_oldsavedoffset = prog->suboffset;
+           PL_reg_oldsavedcoffset = prog->suboffset;
  #ifdef PERL_OLD_COPY_ON_WRITE
             PL_nrs = prog->saved_copy;
  #endif
  #ifdef PERL_OLD_COPY_ON_WRITE
             PL_nrs = prog->saved_copy;
  #endif
@@ -2706,13 +2870,14 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
         else
             PL_reg_oldsaved = NULL;
         prog->subbeg = PL_bostr;
         else
             PL_reg_oldsaved = NULL;
         prog->subbeg = PL_bostr;
+       prog->suboffset = 0;
+       prog->subcoffset = 0;
         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
      }
  #ifdef DEBUGGING
         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
      }
  #ifdef DEBUGGING
-    PL_reg_starttry = *startpos;
+    PL_reg_starttry = *startposp;
  #endif
  #endif
-    prog->offs[0].start = *startpos - PL_bostr;
-    PL_reginput = *startpos;
+    prog->offs[0].start = *startposp - PL_bostr;
      prog->lastparen = 0;
      prog->lastcloseparen = 0;
      PL_regsize = 0;
      prog->lastparen = 0;
      prog->lastcloseparen = 0;
      PL_regsize = 0;
@@ -2742,12 +2907,13 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
      }
  #endif
      REGCP_SET(lastcp);
      }
  #endif
      REGCP_SET(lastcp);
-    if (regmatch(reginfo, progi->program + 1)) {
-       prog->offs[0].end = PL_reginput - PL_bostr;
+    result = regmatch(reginfo, *startposp, progi->program + 1);
+    if (result != -1) {
+       prog->offs[0].end = result;
         return 1;
      }
      if (reginfo->cutpoint)
         return 1;
      }
      if (reginfo->cutpoint)
-        *startpos= reginfo->cutpoint;
+        *startposp= reginfo->cutpoint;
      REGCP_UNWIND(lastcp);
      return 0;
  }
      REGCP_UNWIND(lastcp);
      return 0;
  }
@@ -2773,6 +2939,8 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
  
  #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
  #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
  
  #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
  #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
+#define CHRTEST_NOT_A_CP_1 -999
+#define CHRTEST_NOT_A_CP_2 -998
  
  #define SLAB_FIRST(s) (&(s)->states[0])
  #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
  
  #define SLAB_FIRST(s) (&(s)->states[0])
  #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
@@ -2799,20 +2967,23 @@ S_push_slab(pTHX)
  
  /* push a new state then goto it */
  
  
  /* push a new state then goto it */
  
-#define PUSH_STATE_GOTO(state, node) \
+#define PUSH_STATE_GOTO(state, node, input) \
+    pushinput = input; \
      scan = node; \
      st->resume_state = state; \
      goto push_state;
  
  /* push a new state with success backtracking, then goto it */
  
      scan = node; \
      st->resume_state = state; \
      goto push_state;
  
  /* push a new state with success backtracking, then goto it */
  
-#define PUSH_YES_STATE_GOTO(state, node) \
+#define PUSH_YES_STATE_GOTO(state, node, input) \
+    pushinput = input; \
      scan = node; \
      st->resume_state = state; \
      goto push_yes_state;
  
  
  
      scan = node; \
      st->resume_state = state; \
      goto push_yes_state;
  
  
  
+
  /*
  
  regmatch() - main matching routine
  /*
  
  regmatch() - main matching routine
@@ -2878,7 +3049,7 @@ implementation:
         // push a yes backtrack state with a resume value of
         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
         // first node of A:
         // push a yes backtrack state with a resume value of
         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
         // first node of A:
-       PUSH_YES_STATE_GOTO(IFMATCH_A, A);
+       PUSH_YES_STATE_GOTO(IFMATCH_A, A, newinput);
         // NOTREACHED
  
      case IFMATCH_A: // we have successfully executed A; now continue with B
         // NOTREACHED
  
      case IFMATCH_A: // we have successfully executed A; now continue with B
@@ -2915,8 +3086,8 @@ The topmost backtrack state, pointed to by st, is usually free. If you
  want to claim it, populate any ST.foo fields in it with values you wish to
  save, then do one of
  
  want to claim it, populate any ST.foo fields in it with values you wish to
  save, then do one of
  
-       PUSH_STATE_GOTO(resume_state, node);
-       PUSH_YES_STATE_GOTO(resume_state, node);
+       PUSH_STATE_GOTO(resume_state, node, newinput);
+       PUSH_YES_STATE_GOTO(resume_state, node, newinput);
  
  which sets that backtrack state's resume value to 'resume_state', pushes a
  new free entry to the top of the backtrack stack, then goes to 'node'.
  
  which sets that backtrack state's resume value to 'resume_state', pushes a
  new free entry to the top of the backtrack stack, then goes to 'node'.
@@ -3106,10 +3277,264 @@ S_clear_backtrack_stack(pTHX_ void *p)
         Safefree(osl);
      }
  }
         Safefree(osl);
      }
  }
+static bool
+S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8* c1_utf8, int *c2p, U8* c2_utf8)
+{
+    /* This function determines if there are one or two characters that match
+     * the first character of the passed-in EXACTish node <text_node>, and if
+     * so, returns them in the passed-in pointers.
+     *
+     * If it determines that no possible character in the target string can
+     * match, it returns FALSE; otherwise TRUE.  (The FALSE situation occurs if
+     * the first character in <text_node> requires UTF-8 to represent, and the
+     * target string isn't in UTF-8.)
+     *
+     * If there are more than two characters that could match the beginning of
+     * <text_node>, or if more context is required to determine a match or not,
+     * it sets both *<c1p> and *<c2p> to CHRTEST_VOID.
+     *
+     * The motiviation behind this function is to allow the caller to set up
+     * tight loops for matching.  If <text_node> is of type EXACT, there is
+     * only one possible character that can match its first character, and so
+     * the situation is quite simple.  But things get much more complicated if
+     * folding is involved.  It may be that the first character of an EXACTFish
+     * node doesn't participate in any possible fold, e.g., punctuation, so it
+     * can be matched only by itself.  The vast majority of characters that are
+     * in folds match just two things, their lower and upper-case equivalents.
+     * But not all are like that; some have multiple possible matches, or match
+     * sequences of more than one character.  This function sorts all that out.
+     *
+     * Consider the patterns A*B or A*?B where A and B are arbitrary.  In a
+     * loop of trying to match A*, we know we can't exit where the thing
+     * following it isn't a B.  And something can't be a B unless it is the
+     * beginning of B.  By putting a quick test for that beginning in a tight
+     * loop, we can rule out things that can't possibly be B without having to
+     * break out of the loop, thus avoiding work.  Similarly, if A is a single
+     * character, we can make a tight loop matching A*, using the outputs of
+     * this function.
+     *
+     * If the target string to match isn't in UTF-8, and there aren't
+     * complications which require CHRTEST_VOID, *<c1p> and *<c2p> are set to
+     * the one or two possible octets (which are characters in this situation)
+     * that can match.  In all cases, if there is only one character that can
+     * match, *<c1p> and *<c2p> will be identical.
+     *
+     * If the target string is in UTF-8, the buffers pointed to by <c1_utf8>
+     * and <c2_utf8> will contain the one or two UTF-8 sequences of bytes that
+     * can match the beginning of <text_node>.  They should be declared with at
+     * least length UTF8_MAXBYTES+1.  (If the target string isn't in UTF-8, it is
+     * undefined what these contain.)  If one or both of the buffers are
+     * invariant under UTF-8, *<c1p>, and *<c2p> will also be set to the
+     * corresponding invariant.  If variant, the corresponding *<c1p> and/or
+     * *<c2p> will be set to a negative number(s) that shouldn't match any code
+     * point (unless inappropriately coerced to unsigned).   *<c1p> will equal
+     * *<c2p> if and only if <c1_utf8> and <c2_utf8> are the same. */
  
  
+    const bool utf8_target = PL_reg_match_utf8;
  
  
-STATIC I32                     /* 0 failure, 1 success */
-S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
+    UV c1 = CHRTEST_NOT_A_CP_1;
+    UV c2 = CHRTEST_NOT_A_CP_2;
+    bool use_chrtest_void = FALSE;
+
+    /* Used when we have both utf8 input and utf8 output, to avoid converting
+     * to/from code points */
+    bool utf8_has_been_setup = FALSE;
+
+    dVAR;
+
+    U8 *pat = (U8*)STRING(text_node);
+
+    if (OP(text_node) == EXACT) {
+
+        /* In an exact node, only one thing can be matched, that first
+         * character.  If both the pat and the target are UTF-8, we can just
+         * copy the input to the output, avoiding finding the code point of
+         * that character */
+        if (! UTF_PATTERN) {
+            c2 = c1 = *pat;
+        }
+        else if (utf8_target) {
+            Copy(pat, c1_utf8, UTF8SKIP(pat), U8);
+            Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
+            utf8_has_been_setup = TRUE;
+        }
+        else {
+            c2 = c1 = valid_utf8_to_uvchr(pat, NULL);
+        }
+    }
+    else /* an EXACTFish node */
+         if ((UTF_PATTERN
+                    && is_MULTI_CHAR_FOLD_utf8_safe(pat,
+                                                    pat + STR_LEN(text_node)))
+             || (! UTF_PATTERN
+                    && is_MULTI_CHAR_FOLD_latin1_safe(pat,
+                                                    pat + STR_LEN(text_node))))
+    {
+        /* Multi-character folds require more context to sort out.  Also
+         * PL_utf8_foldclosures used below doesn't handle them, so have to be
+         * handled outside this routine */
+        use_chrtest_void = TRUE;
+    }
+    else { /* an EXACTFish node which doesn't begin with a multi-char fold */
+        c1 = (UTF_PATTERN) ? valid_utf8_to_uvchr(pat, NULL) : *pat;
+        if (c1 > 256) {
+            /* Load the folds hash, if not already done */
+            SV** listp;
+            if (! PL_utf8_foldclosures) {
+                if (! PL_utf8_tofold) {
+                    U8 dummy[UTF8_MAXBYTES+1];
+
+                    /* Force loading this by folding an above-Latin1 char */
+                    to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
+                    assert(PL_utf8_tofold); /* Verify that worked */
+                }
+                PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
+            }
+
+            /* The fold closures data structure is a hash with the keys being
+             * the UTF-8 of every character that is folded to, like 'k', and
+             * the values each an array of all code points that fold to its
+             * key.  e.g. [ 'k', 'K', KELVIN_SIGN ].  Multi-character folds are
+             * not included */
+            if ((! (listp = hv_fetch(PL_utf8_foldclosures,
+                                     (char *) pat,
+                                     UTF8SKIP(pat),
+                                     FALSE))))
+            {
+                /* Not found in the hash, therefore there are no folds
+                 * containing it, so there is only a single character that
+                 * could match */
+                c2 = c1;
+            }
+            else {  /* Does participate in folds */
+                AV* list = (AV*) *listp;
+                if (av_len(list) != 1) {
+
+                    /* If there aren't exactly two folds to this, it is outside
+                     * the scope of this function */
+                    use_chrtest_void = TRUE;
+                }
+                else {  /* There are two.  Get them */
+                    SV** c_p = av_fetch(list, 0, FALSE);
+                    if (c_p == NULL) {
+                        Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
+                    }
+                    c1 = SvUV(*c_p);
+
+                    c_p = av_fetch(list, 1, FALSE);
+                    if (c_p == NULL) {
+                        Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
+                    }
+                    c2 = SvUV(*c_p);
+
+                    /* Folds that cross the 255/256 boundary are forbidden if
+                     * EXACTFL, or EXACTFA and one is ASCIII.  Since the
+                     * pattern character is above 256, and its only other match
+                     * is below 256, the only legal match will be to itself.
+                     * We have thrown away the original, so have to compute
+                     * which is the one above 255 */
+                    if ((c1 < 256) != (c2 < 256)) {
+                        if (OP(text_node) == EXACTFL
+                            || (OP(text_node) == EXACTFA
+                                && (isASCII(c1) || isASCII(c2))))
+                        {
+                            if (c1 < 256) {
+                                c1 = c2;
+                            }
+                            else {
+                                c2 = c1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        else /* Here, c1 is < 255 */
+             if (utf8_target
+                 && HAS_NONLATIN1_FOLD_CLOSURE(c1)
+                 && OP(text_node) != EXACTFL
+                 && (OP(text_node) != EXACTFA || ! isASCII(c1)))
+        {
+            /* Here, there could be something above Latin1 in the target which
+             * folds to this character in the pattern.  All such cases except
+             * LATIN SMALL LETTER Y WITH DIAERESIS have more than two characters
+             * involved in their folds, so are outside the scope of this
+             * function */
+            if (UNLIKELY(c1 == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)) {
+                c2 = LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
+            }
+            else {
+                use_chrtest_void = TRUE;
+            }
+        }
+        else { /* Here nothing above Latin1 can fold to the pattern character */
+            switch (OP(text_node)) {
+
+                case EXACTFL:   /* /l rules */
+                    c2 = PL_fold_locale[c1];
+                    break;
+
+                case EXACTF:
+                    if (! utf8_target) {    /* /d rules */
+                        c2 = PL_fold[c1];
+                        break;
+                    }
+                    /* FALLTHROUGH */
+                    /* /u rules for all these.  This happens to work for
+                     * EXACTFA as nothing in Latin1 folds to ASCII */
+                case EXACTFA:
+                case EXACTFU_TRICKYFOLD:
+                case EXACTFU_SS:
+                case EXACTFU:
+                    c2 = PL_fold_latin1[c1];
+                    break;
+
+               default:
+                    Perl_croak(aTHX_ "panic: Unexpected op %u", OP(text_node));
+                    assert(0); /* NOTREACHED */
+            }
+        }
+    }
+
+    /* Here have figured things out.  Set up the returns */
+    if (use_chrtest_void) {
+        *c2p = *c1p = CHRTEST_VOID;
+    }
+    else if (utf8_target) {
+        if (! utf8_has_been_setup) {    /* Don't have the utf8; must get it */
+            uvchr_to_utf8(c1_utf8, c1);
+            uvchr_to_utf8(c2_utf8, c2);
+        }
+
+        /* Invariants are stored in both the utf8 and byte outputs; Use
+         * negative numbers otherwise for the byte ones.  Make sure that the
+         * byte ones are the same iff the utf8 ones are the same */
+        *c1p = (UTF8_IS_INVARIANT(*c1_utf8)) ? *c1_utf8 : CHRTEST_NOT_A_CP_1;
+        *c2p = (UTF8_IS_INVARIANT(*c2_utf8))
+                ? *c2_utf8
+                : (c1 == c2)
+                  ? CHRTEST_NOT_A_CP_1
+                  : CHRTEST_NOT_A_CP_2;
+    }
+    else if (c1 > 255) {
+       if (c2 > 255) {  /* both possibilities are above what a non-utf8 string
+                           can represent */
+           return FALSE;
+       }
+
+       *c1p = *c2p = c2;    /* c2 is the only representable value */
+    }
+    else {  /* c1 is representable; see about c2 */
+       *c1p = c1;
+       *c2p = (c2 < 256) ? c2 : c1;
+    }
+
+    return TRUE;
+}
+
+/* returns -1 on failure, $+[0] on success */
+STATIC I32
+S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
  {
  #if PERL_VERSION < 9 && !defined(PERL_CORE)
      dMY_CXT;
  {
  #if PERL_VERSION < 9 && !defined(PERL_CORE)
      dMY_CXT;
@@ -3128,7 +3553,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
      regnode *next;
      U32 n = 0; /* general value; init to avoid compiler warning */
      I32 ln = 0; /* len or last;  init to avoid compiler warning */
      regnode *next;
      U32 n = 0; /* general value; init to avoid compiler warning */
      I32 ln = 0; /* len or last;  init to avoid compiler warning */
-    char *locinput = PL_reginput;
+    char *locinput = startpos;
+    char *pushinput; /* where to continue after a PUSH */
      I32 nextchr;   /* is always set to UCHARAT(locinput) */
  
      bool result = 0;       /* return value of S_regmatch */
      I32 nextchr;   /* is always set to UCHARAT(locinput) */
  
      bool result = 0;       /* return value of S_regmatch */
@@ -3147,7 +3573,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
      U32 state_num;
      bool no_final = 0;      /* prevent failure from backtracking? */
      bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
      U32 state_num;
      bool no_final = 0;      /* prevent failure from backtracking? */
      bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
-    char *startpoint = PL_reginput;
+    char *startpoint = locinput;
      SV *popmark = NULL;     /* are we looking for a mark? */
      SV *sv_commit = NULL;   /* last mark name seen in failure */
      SV *sv_yes_mark = NULL; /* last mark name we have seen 
      SV *popmark = NULL;     /* are we looking for a mark? */
      SV *sv_commit = NULL;   /* last mark name seen in failure */
      SV *sv_yes_mark = NULL; /* last mark name we have seen 
@@ -3212,7 +3638,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
         st = PL_regmatch_state = S_push_slab(aTHX);
  
      /* Note that nextchr is a byte even in UTF */
         st = PL_regmatch_state = S_push_slab(aTHX);
  
      /* Note that nextchr is a byte even in UTF */
-    nextchr = UCHARAT(locinput);
+    SET_nextchr;
      scan = prog;
      while (scan != NULL) {
  
      scan = prog;
      while (scan != NULL) {
  
@@ -3237,96 +3663,93 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
  
        reenter_switch:
  
  
        reenter_switch:
  
+        SET_nextchr;
+        assert(nextchr < 256 && (nextchr >= 0 || nextchr == NEXTCHR_EOS));
+
         switch (state_num) {
         switch (state_num) {
-       case BOL:
+       case BOL: /*  /^../  */
             if (locinput == PL_bostr)
             {
                 /* reginfo->till = reginfo->bol; */
                 break;
             }
             sayNO;
             if (locinput == PL_bostr)
             {
                 /* reginfo->till = reginfo->bol; */
                 break;
             }
             sayNO;
-       case MBOL:
+
+       case MBOL: /*  /^../m  */
             if (locinput == PL_bostr ||
             if (locinput == PL_bostr ||
-               ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
+               (!NEXTCHR_IS_EOS && locinput[-1] == '\n'))
             {
                 break;
             }
             sayNO;
             {
                 break;
             }
             sayNO;
-       case SBOL:
+
+       case SBOL: /*  /^../s  */
             if (locinput == PL_bostr)
                 break;
             sayNO;
             if (locinput == PL_bostr)
                 break;
             sayNO;
-       case GPOS:
+
+       case GPOS: /*  \G  */
             if (locinput == reginfo->ganch)
                 break;
             sayNO;
  
             if (locinput == reginfo->ganch)
                 break;
             sayNO;
  
-       case KEEPS:
+       case KEEPS: /*   \K  */
             /* update the startpoint */
             st->u.keeper.val = rex->offs[0].start;
             /* update the startpoint */
             st->u.keeper.val = rex->offs[0].start;
-           PL_reginput = locinput;
             rex->offs[0].start = locinput - PL_bostr;
             rex->offs[0].start = locinput - PL_bostr;
-           PUSH_STATE_GOTO(KEEPS_next, next);
-           /*NOT-REACHED*/
+           PUSH_STATE_GOTO(KEEPS_next, next, locinput);
+           assert(0); /*NOTREACHED*/
         case KEEPS_next_fail:
             /* rollback the start point change */
             rex->offs[0].start = st->u.keeper.val;
             sayNO_SILENT;
         case KEEPS_next_fail:
             /* rollback the start point change */
             rex->offs[0].start = st->u.keeper.val;
             sayNO_SILENT;
-           /*NOT-REACHED*/
-       case EOL:
+           assert(0); /*NOTREACHED*/
+
+       case EOL: /* /..$/  */
                 goto seol;
                 goto seol;
-       case MEOL:
-           if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
+
+       case MEOL: /* /..$/m  */
+           if (!NEXTCHR_IS_EOS && nextchr != '\n')
                 sayNO;
             break;
                 sayNO;
             break;
-       case SEOL:
+
+       case SEOL: /* /..$/s  */
           seol:
           seol:
-           if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
+           if (!NEXTCHR_IS_EOS && nextchr != '\n')
                 sayNO;
             if (PL_regeol - locinput > 1)
                 sayNO;
             break;
                 sayNO;
             if (PL_regeol - locinput > 1)
                 sayNO;
             break;
-       case EOS:
-           if (PL_regeol != locinput)
+
+       case EOS: /*  \z  */
+           if (!NEXTCHR_IS_EOS)
                 sayNO;
             break;
                 sayNO;
             break;
-       case SANY:
-           if (!nextchr && locinput >= PL_regeol)
+
+       case SANY: /*  /./s  */
+           if (NEXTCHR_IS_EOS)
                 sayNO;
                 sayNO;
-           if (utf8_target) {
-               locinput += PL_utf8skip[nextchr];
-               if (locinput > PL_regeol)
-                   sayNO;
-               nextchr = UCHARAT(locinput);
-           }
-           else
-               nextchr = UCHARAT(++locinput);
-           break;
-       case CANY:
-           if (!nextchr && locinput >= PL_regeol)
+            goto increment_locinput;
+
+       case CANY: /*  \C  */
+           if (NEXTCHR_IS_EOS)
                 sayNO;
                 sayNO;
-           nextchr = UCHARAT(++locinput);
+           locinput++;
             break;
             break;
-       case REG_ANY:
-           if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
+
+       case REG_ANY: /*  /./  */
+           if ((NEXTCHR_IS_EOS) || nextchr == '\n')
                 sayNO;
                 sayNO;
-           if (utf8_target) {
-               locinput += PL_utf8skip[nextchr];
-               if (locinput > PL_regeol)
-                   sayNO;
-               nextchr = UCHARAT(locinput);
-           }
-           else
-               nextchr = UCHARAT(++locinput);
-           break;
+            goto increment_locinput;
+
  
  #undef  ST
  #define ST st->u.trie
  
  #undef  ST
  #define ST st->u.trie
-        case TRIEC:
+        case TRIEC: /* (ab|cd) with known charclass */
              /* In this case the charclass data is available inline so
                 we can fail fast without a lot of extra overhead. 
               */
              /* In this case the charclass data is available inline so
                 we can fail fast without a lot of extra overhead. 
               */
-            if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
+            if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
                  DEBUG_EXECUTE_r(
                      PerlIO_printf(Perl_debug_log,
                                "%*s  %sfailed to match trie start class...%s\n",
                  DEBUG_EXECUTE_r(
                      PerlIO_printf(Perl_debug_log,
                                "%*s  %sfailed to match trie start class...%s\n",
@@ -3336,7 +3759,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                  assert(0); /* NOTREACHED */
              }
              /* FALL THROUGH */
                  assert(0); /* NOTREACHED */
              }
              /* FALL THROUGH */
-       case TRIE:
+       case TRIE:  /* (ab|cd)  */
             /* the basic plan of execution of the trie is:
              * At the beginning, run though all the states, and
              * find the longest-matching word. Also remember the position
             /* the basic plan of execution of the trie is:
              * At the beginning, run though all the states, and
              * find the longest-matching word. Also remember the position
@@ -3345,7 +3768,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
              *    ab|a|x|abcd|abc
              * when matched against the string "abcde", will generate
              * accept states for all words except 3, with the longest
              *    ab|a|x|abcd|abc
              * when matched against the string "abcde", will generate
              * accept states for all words except 3, with the longest
-            * matching word being 4, and the shortest being 1 (with
+            * matching word being 4, and the shortest being 2 (with
              * the position being after char 1 of the string).
              *
              * Then for each matching word, in word order (i.e. 1,2,4,5),
              * the position being after char 1 of the string).
              *
              * Then for each matching word, in word order (i.e. 1,2,4,5),
@@ -3391,7 +3814,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
                  U32 state = trie->startstate;
  
                 HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
                  U32 state = trie->startstate;
  
-                if (trie->bitmap && !TRIE_BITMAP_TEST(trie,*locinput) ) {
+                if (   trie->bitmap
+                    && (NEXTCHR_IS_EOS || !TRIE_BITMAP_TEST(trie, nextchr)))
+                {
                     if (trie->states[ state ].wordnum) {
                          DEBUG_EXECUTE_r(
                              PerlIO_printf(Perl_debug_log,
                     if (trie->states[ state ].wordnum) {
                          DEBUG_EXECUTE_r(
                              PerlIO_printf(Perl_debug_log,
@@ -3464,7 +3889,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                     });
  
                     /* read a char and goto next state */
                     });
  
                     /* read a char and goto next state */
-                   if ( base ) {
+                   if ( base && (foldlen || uc < (U8*)PL_regeol)) {
                         I32 offset;
                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
                                              uscan, len, uvc, charid, foldlen,
                         I32 offset;
                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
                                              uscan, len, uvc, charid, foldlen,
@@ -3521,6 +3946,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             assert(0); /* NOTREACHED */
  
         case TRIE_next_fail: /* we failed - try next alternative */
             assert(0); /* NOTREACHED */
  
         case TRIE_next_fail: /* we failed - try next alternative */
+        {
+            U8 *uc;
              if ( ST.jump) {
                  REGCP_UNWIND(ST.cp);
                  UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
              if ( ST.jump) {
                  REGCP_UNWIND(ST.cp);
                  UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
@@ -3565,7 +3992,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             /* find start char of end of current word */
             {
                 U32 chars; /* how many chars to skip */
             /* find start char of end of current word */
             {
                 U32 chars; /* how many chars to skip */
-               U8 *uc = ST.firstpos;
                 reg_trie_data * const trie
                     = (reg_trie_data*)rexi->data->data[ARG(ST.me)];
  
                 reg_trie_data * const trie
                     = (reg_trie_data*)rexi->data->data[ARG(ST.me)];
  
@@ -3573,6 +3999,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                             >=  ST.firstchars);
                 chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
                             - ST.firstchars;
                             >=  ST.firstchars);
                 chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
                             - ST.firstchars;
+               uc = ST.firstpos;
  
                 if (ST.longfold) {
                     /* the hard option - fold each char in turn and find
  
                 if (ST.longfold) {
                     /* the hard option - fold each char in turn and find
@@ -3612,7 +4039,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                     else
                         uc += chars;
                 }
                     else
                         uc += chars;
                 }
-               PL_reginput = (char *)uc;
             }
  
             scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
             }
  
             scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
@@ -3630,7 +4056,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             });
  
             if (ST.accepted > 1 || has_cutgroup) {
             });
  
             if (ST.accepted > 1 || has_cutgroup) {
-               PUSH_STATE_GOTO(TRIE_next, scan);
+               PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc);
                 assert(0); /* NOTREACHED */
             }
             /* only one choice left - just continue */
                 assert(0); /* NOTREACHED */
             }
             /* only one choice left - just continue */
@@ -3653,13 +4079,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                     PL_colors[5] );
             });
  
                     PL_colors[5] );
             });
  
-           locinput = PL_reginput;
-           nextchr = UCHARAT(locinput);
+           locinput = (char*)uc;
             continue; /* execute rest of RE */
             assert(0); /* NOTREACHED */
             continue; /* execute rest of RE */
             assert(0); /* NOTREACHED */
+        }
  #undef  ST
  
  #undef  ST
  
-       case EXACT: {
+       case EXACT: {            /*  /abc/        */
             char *s = STRING(scan);
             ln = STR_LEN(scan);
             if (utf8_target != UTF_PATTERN) {
             char *s = STRING(scan);
             ln = STR_LEN(scan);
             if (utf8_target != UTF_PATTERN) {
@@ -3668,35 +4094,59 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 const char * const e = s + ln;
  
                 if (utf8_target) {
                 const char * const e = s + ln;
  
                 if (utf8_target) {
-                   /* The target is utf8, the pattern is not utf8. */
+                    /* The target is utf8, the pattern is not utf8.
+                     * Above-Latin1 code points can't match the pattern;
+                     * invariants match exactly, and the other Latin1 ones need
+                     * to be downgraded to a single byte in order to do the
+                     * comparison.  (If we could be confident that the target
+                     * is not malformed, this could be refactored to have fewer
+                     * tests by just assuming that if the first bytes match, it
+                     * is an invariant, but there are tests in the test suite
+                     * dealing with (??{...}) which violate this) */
                     while (s < e) {
                     while (s < e) {
-                       STRLEN ulen;
                         if (l >= PL_regeol)
                              sayNO;
                         if (l >= PL_regeol)
                              sayNO;
-                       if (NATIVE_TO_UNI(*(U8*)s) !=
-                           utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
-                                           uniflags))
-                            sayNO;
-                       l += ulen;
-                       s ++;
+                        if (UTF8_IS_ABOVE_LATIN1(* (U8*) l)) {
+                            sayNO;
+                        }
+                        if (UTF8_IS_INVARIANT(*(U8*)l)) {
+                           if (*l != *s) {
+                                sayNO;
+                            }
+                            l++;
+                        }
+                        else {
+                            if (TWO_BYTE_UTF8_TO_UNI(*l, *(l+1)) != * (U8*) s) {
+                                sayNO;
+                            }
+                            l += 2;
+                        }
+                       s++;
                     }
                 }
                 else {
                     /* The target is not utf8, the pattern is utf8. */
                     while (s < e) {
                     }
                 }
                 else {
                     /* The target is not utf8, the pattern is utf8. */
                     while (s < e) {
-                       STRLEN ulen;
-                       if (l >= PL_regeol)
-                           sayNO;
-                       if (NATIVE_TO_UNI(*((U8*)l)) !=
-                           utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
-                                          uniflags))
-                           sayNO;
-                       s += ulen;
-                       l ++;
+                        if (l >= PL_regeol || UTF8_IS_ABOVE_LATIN1(* (U8*) s))
+                        {
+                            sayNO;
+                        }
+                        if (UTF8_IS_INVARIANT(*(U8*)s)) {
+                           if (*s != *l) {
+                                sayNO;
+                            }
+                            s++;
+                        }
+                        else {
+                            if (TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)) != * (U8*) l) {
+                                sayNO;
+                            }
+                            s += 2;
+                        }
+                       l++;
                     }
                 }
                 locinput = l;
                     }
                 }
                 locinput = l;
-               nextchr = UCHARAT(locinput);
                 break;
             }
             /* The target and the pattern have the same utf8ness. */
                 break;
             }
             /* The target and the pattern have the same utf8ness. */
@@ -3708,10 +4158,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             if (ln > 1 && memNE(s, locinput, ln))
                 sayNO;
             locinput += ln;
             if (ln > 1 && memNE(s, locinput, ln))
                 sayNO;
             locinput += ln;
-           nextchr = UCHARAT(locinput);
             break;
             }
             break;
             }
-       case EXACTFL: {
+
+       case EXACTFL: {          /*  /abc/il      */
             re_fold_t folder;
             const U8 * fold_array;
             const char * s;
             re_fold_t folder;
             const U8 * fold_array;
             const char * s;
@@ -3723,21 +4173,21 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
             goto do_exactf;
  
             fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
             goto do_exactf;
  
-       case EXACTFU_SS:
-       case EXACTFU_TRICKYFOLD:
-       case EXACTFU:
+       case EXACTFU_SS:         /*  /\x{df}/iu   */
+       case EXACTFU_TRICKYFOLD: /*  /\x{390}/iu  */
+       case EXACTFU:            /*  /abc/iu      */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
             goto do_exactf;
  
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
             goto do_exactf;
  
-       case EXACTFA:
+       case EXACTFA:            /*  /abc/iaa     */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_exactf;
  
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_exactf;
  
-       case EXACTF:
+       case EXACTF:             /*  /abc/i       */
             folder = foldEQ;
             fold_array = PL_fold;
             fold_utf8_flags = 0;
             folder = foldEQ;
             fold_array = PL_fold;
             fold_utf8_flags = 0;
@@ -3758,13 +4208,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                     sayNO;
                 }
                 locinput = e;
                     sayNO;
                 }
                 locinput = e;
-               nextchr = UCHARAT(locinput);
                 break;
             }
  
             /* Neither the target nor the pattern are utf8 */
                 break;
             }
  
             /* Neither the target nor the pattern are utf8 */
-           if (UCHARAT(s) != nextchr &&
-               UCHARAT(s) != fold_array[nextchr])
+           if (UCHARAT(s) != nextchr
+                && !NEXTCHR_IS_EOS
+               && UCHARAT(s) != fold_array[nextchr])
             {
                 sayNO;
             }
             {
                 sayNO;
             }
@@ -3773,23 +4223,22 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             if (ln > 1 && ! folder(s, locinput, ln))
                 sayNO;
             locinput += ln;
             if (ln > 1 && ! folder(s, locinput, ln))
                 sayNO;
             locinput += ln;
-           nextchr = UCHARAT(locinput);
             break;
         }
  
         /* XXX Could improve efficiency by separating these all out using a
          * macro or in-line function.  At that point regcomp.c would no longer
          * have to set the FLAGS fields of these */
             break;
         }
  
         /* XXX Could improve efficiency by separating these all out using a
          * macro or in-line function.  At that point regcomp.c would no longer
          * have to set the FLAGS fields of these */
-       case BOUNDL:
-       case NBOUNDL:
+       case BOUNDL:  /*  /\b/l  */
+       case NBOUNDL: /*  /\B/l  */
             PL_reg_flags |= RF_tainted;
             /* FALL THROUGH */
             PL_reg_flags |= RF_tainted;
             /* FALL THROUGH */
-       case BOUND:
-       case BOUNDU:
-       case BOUNDA:
-       case NBOUND:
-       case NBOUNDU:
-       case NBOUNDA:
+       case BOUND:   /*  /\b/   */
+       case BOUNDU:  /*  /\b/u  */
+       case BOUNDA:  /*  /\b/a  */
+       case NBOUND:  /*  /\B/   */
+       case NBOUNDU: /*  /\B/u  */
+       case NBOUNDA: /*  /\B/a  */
             /* was last char in word? */
             if (utf8_target
                 && FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET
             /* was last char in word? */
             if (utf8_target
                 && FLAGS(scan) != REGEX_ASCII_RESTRICTED_CHARSET
@@ -3804,12 +4253,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 }
                 if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
                     ln = isALNUM_uni(ln);
                 }
                 if (FLAGS(scan) != REGEX_LOCALE_CHARSET) {
                     ln = isALNUM_uni(ln);
-                   LOAD_UTF8_CHARCLASS_ALNUM();
-                   n = swash_fetch(PL_utf8_alnum, (U8*)locinput, utf8_target);
+                    if (NEXTCHR_IS_EOS)
+                        n = 0;
+                    else {
+                        LOAD_UTF8_CHARCLASS_ALNUM();
+                        n = swash_fetch(PL_utf8_alnum, (U8*)locinput,
+                                                                utf8_target);
+                    }
                 }
                 else {
                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
                 }
                 else {
                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
-                   n = isALNUM_LC_utf8((U8*)locinput);
+                   n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC_utf8((U8*)locinput);
                 }
             }
             else {
                 }
             }
             else {
@@ -3830,20 +4284,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 switch (FLAGS(scan)) {
                     case REGEX_UNICODE_CHARSET:
                         ln = isWORDCHAR_L1(ln);
                 switch (FLAGS(scan)) {
                     case REGEX_UNICODE_CHARSET:
                         ln = isWORDCHAR_L1(ln);
-                       n = isWORDCHAR_L1(nextchr);
+                       n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_L1(nextchr);
                         break;
                     case REGEX_LOCALE_CHARSET:
                         ln = isALNUM_LC(ln);
                         break;
                     case REGEX_LOCALE_CHARSET:
                         ln = isALNUM_LC(ln);
-                       n = isALNUM_LC(nextchr);
+                       n = NEXTCHR_IS_EOS ? 0 : isALNUM_LC(nextchr);
                         break;
                     case REGEX_DEPENDS_CHARSET:
                         ln = isALNUM(ln);
                         break;
                     case REGEX_DEPENDS_CHARSET:
                         ln = isALNUM(ln);
-                       n = isALNUM(nextchr);
+                       n = NEXTCHR_IS_EOS ? 0 : isALNUM(nextchr);
                         break;
                     case REGEX_ASCII_RESTRICTED_CHARSET:
                     case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
                         ln = isWORDCHAR_A(ln);
                         break;
                     case REGEX_ASCII_RESTRICTED_CHARSET:
                     case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
                         ln = isWORDCHAR_A(ln);
-                       n = isWORDCHAR_A(nextchr);
+                       n = NEXTCHR_IS_EOS ? 0 : isWORDCHAR_A(nextchr);
                         break;
                     default:
                         Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
                         break;
                     default:
                         Perl_croak(aTHX_ "panic: Unexpected FLAGS %u in op %u", FLAGS(scan), OP(scan));
@@ -3855,31 +4309,26 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             if (((!ln) == (!n)) == (OP(scan) < NBOUND))
                     sayNO;
             break;
             if (((!ln) == (!n)) == (OP(scan) < NBOUND))
                     sayNO;
             break;
-       case ANYOFV:
-       case ANYOF:
-           if (utf8_target || state_num == ANYOFV) {
-               STRLEN inclasslen = PL_regeol - locinput;
-               if (locinput >= PL_regeol)
-                   sayNO;
  
  
-               if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, utf8_target))
+       case ANYOF:  /*  /[abc]/       */
+            if (NEXTCHR_IS_EOS)
+                sayNO;
+           if (utf8_target) {
+               if (!reginclass(rex, scan, (U8*)locinput, utf8_target))
                     sayNO;
                     sayNO;
-               locinput += inclasslen;
-               nextchr = UCHARAT(locinput);
+               locinput += UTF8SKIP(locinput);
                 break;
             }
             else {
                 break;
             }
             else {
-               if (nextchr < 0)
-                   nextchr = UCHARAT(locinput);
-               if (!nextchr && locinput >= PL_regeol)
-                   sayNO;
                 if (!REGINCLASS(rex, scan, (U8*)locinput))
                     sayNO;
                 if (!REGINCLASS(rex, scan, (U8*)locinput))
                     sayNO;
-               nextchr = UCHARAT(++locinput);
+               locinput++;
                 break;
             }
             break;
                 break;
             }
             break;
-       /* Special char classes - The defines start on line 129 or so */
+
+       /* Special char classes: \d, \w etc.
+         * The defines start on line 166 or so */
          CCC_TRY_U(ALNUM,  NALNUM,  isWORDCHAR,
                   ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8,
                   ALNUMU, NALNUMU, isWORDCHAR_L1,
          CCC_TRY_U(ALNUM,  NALNUM,  isWORDCHAR,
                   ALNUML, NALNUML, isALNUM_LC, isALNUM_LC_utf8,
                   ALNUMU, NALNUMU, isWORDCHAR_L1,
@@ -3897,25 +4346,19 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 DIGITA, NDIGITA, isDIGIT_A,
                 digit, "0");
  
                 DIGITA, NDIGITA, isDIGIT_A,
                 digit, "0");
  
-        case POSIXA:
-            if (locinput >= PL_regeol || ! _generic_isCC_A(nextchr, FLAGS(scan))) {
+        case POSIXA: /* /[[:ascii:]]/ etc */
+            if (NEXTCHR_IS_EOS || ! _generic_isCC_A(nextchr, FLAGS(scan))) {
                  sayNO;
              }
              /* Matched a utf8-invariant, so don't have to worry about utf8 */
                  sayNO;
              }
              /* Matched a utf8-invariant, so don't have to worry about utf8 */
-            nextchr = UCHARAT(++locinput);
+            locinput++;
              break;
              break;
-        case NPOSIXA:
-            if (locinput >= PL_regeol || _generic_isCC_A(nextchr, FLAGS(scan))) {
+
+        case NPOSIXA: /*  /[^[:ascii:]]/  etc */
+            if (NEXTCHR_IS_EOS || _generic_isCC_A(nextchr, FLAGS(scan))) {
                  sayNO;
              }
                  sayNO;
              }
-            if (utf8_target) {
-                locinput += PL_utf8skip[nextchr];
-                nextchr = UCHARAT(locinput);
-            }
-            else {
-                nextchr = UCHARAT(++locinput);
-            }
-            break;
+            goto increment_locinput;
  
         case CLUMP: /* Match \X: logical Unicode character.  This is defined as
                        a Unicode extended Grapheme Cluster */
  
         case CLUMP: /* Match \X: logical Unicode character.  This is defined as
                        a Unicode extended Grapheme Cluster */
@@ -3926,43 +4369,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                | Prepend* Begin Extend*
                | .
  
                | Prepend* Begin Extend*
                | .
  
-              Begin is (Hangul-syllable | ! Control)
-              Extend is (Grapheme_Extend | Spacing_Mark)
-              Control is [ GCB_Control CR LF ]
-
-              The discussion below shows how the code for CLUMP is derived
-              from this regex.  Note that most of these concepts are from
-              property values of the Grapheme Cluster Boundary (GCB) property.
-              No code point can have multiple property values for a given
-              property.  Thus a code point in Prepend can't be in Control, but
-              it must be in !Control.  This is why Control above includes
-              GCB_Control plus CR plus LF.  The latter two are used in the GCB
-              property separately, and so can't be in GCB_Control, even though
-              they logically are controls.  Control is not the same as gc=cc,
-              but includes format and other characters as well.
-
-              The Unicode definition of Hangul-syllable is:
-                  L+
-                  | (L* ( ( V | LV ) V* | LVT ) T*)
-                  | T+ 
-                 )
-              Each of these is a value for the GCB property, and hence must be
-              disjoint, so the order they are tested is immaterial, so the
-              above can safely be changed to
-                  T+
-                  | L+
-                  | (L* ( LVT | ( V | LV ) V*) T*)
-
-              The last two terms can be combined like this:
-                  L* ( L
-                       | (( LVT | ( V | LV ) V*) T*))
-
-              And refactored into this:
-                  L* (L | LVT T* | V  V* T* | LV  V* T*)
-
-              That means that if we have seen any L's at all we can quit
-              there, but if the next character is an LVT, a V, or an LV we
-              should keep going.
+               Begin is:           ( Special_Begin | ! Control )
+               Special_Begin is:   ( Regional-Indicator+ | Hangul-syllable )
+               Extend is:          ( Grapheme_Extend | Spacing_Mark )
+               Control is:         [ GCB_Control  CR  LF ]
+               Hangul-syllable is: ( T+ | ( L* ( L | ( LVT | ( V | LV ) V* ) T* ) ))
+
+               If we create a 'Regular_Begin' = Begin - Special_Begin, then
+               we can rewrite
+
+                   Begin is ( Regular_Begin + Special Begin )
+
+               It turns out that 98.4% of all Unicode code points match
+               Regular_Begin.  Doing it this way eliminates a table match in
+               the previous implementation for almost all Unicode code points.
  
                There is a subtlety with Prepend* which showed up in testing.
                Note that the Begin, and only the Begin is required in:
  
                There is a subtlety with Prepend* which showed up in testing.
                Note that the Begin, and only the Begin is required in:
@@ -3974,7 +4394,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                Prepend, that one will be a suitable Begin.
             */
  
                Prepend, that one will be a suitable Begin.
             */
  
-           if (locinput >= PL_regeol)
+           if (NEXTCHR_IS_EOS)
                 sayNO;
             if  (! utf8_target) {
  
                 sayNO;
             if  (! utf8_target) {
  
@@ -3990,10 +4410,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
  
                 /* Utf8: See if is ( CR LF ); already know that locinput <
                  * PL_regeol, so locinput+1 is in bounds */
  
                 /* Utf8: See if is ( CR LF ); already know that locinput <
                  * PL_regeol, so locinput+1 is in bounds */
-               if (nextchr == '\r' && UCHARAT(locinput + 1) == '\n') {
+               if ( nextchr == '\r' && locinput+1 < PL_regeol
+                        && UCHARAT(locinput + 1) == '\n')
+                {
                     locinput += 2;
                 }
                 else {
                     locinput += 2;
                 }
                 else {
+                    STRLEN len;
+
                     /* In case have to backtrack to beginning, then match '.' */
                     char *starting = locinput;
  
                     /* In case have to backtrack to beginning, then match '.' */
                     char *starting = locinput;
  
@@ -4002,22 +4426,23 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
  
                     LOAD_UTF8_CHARCLASS_GCB();
  
  
                     LOAD_UTF8_CHARCLASS_GCB();
  
-                   /* Match (prepend)* */
-                   while (locinput < PL_regeol
-                          && swash_fetch(PL_utf8_X_prepend,
-                                         (U8*)locinput, utf8_target))
-                   {
-                       previous_prepend = locinput;
-                       locinput += UTF8SKIP(locinput);
-                   }
+                    /* Match (prepend)*   */
+                    while (locinput < PL_regeol
+                           && (len = is_GCB_Prepend_utf8(locinput)))
+                    {
+                        previous_prepend = locinput;
+                        locinput += len;
+                    }
  
                     /* As noted above, if we matched a prepend character, but
                      * the next thing won't match, back off the last prepend we
                      * matched, as it is guaranteed to match the begin */
                     if (previous_prepend
                         && (locinput >=  PL_regeol
  
                     /* As noted above, if we matched a prepend character, but
                      * the next thing won't match, back off the last prepend we
                      * matched, as it is guaranteed to match the begin */
                     if (previous_prepend
                         && (locinput >=  PL_regeol
-                           || ! swash_fetch(PL_utf8_X_begin,
-                                            (U8*)locinput, utf8_target)))
+                           || (! swash_fetch(PL_utf8_X_regular_begin,
+                                            (U8*)locinput, utf8_target)
+                                && ! is_GCB_SPECIAL_BEGIN_utf8(locinput)))
+                        )
                     {
                         locinput = previous_prepend;
                     }
                     {
                         locinput = previous_prepend;
                     }
@@ -4027,111 +4452,102 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                      * moved locinput forward, we tested the result just above
                      * and it either passed, or we backed off so that it will
                      * now pass */
                      * moved locinput forward, we tested the result just above
                      * and it either passed, or we backed off so that it will
                      * now pass */
-                   if (! swash_fetch(PL_utf8_X_begin, (U8*)locinput, utf8_target)) {
+                   if (swash_fetch(PL_utf8_X_regular_begin,
+                                    (U8*)locinput, utf8_target)) {
+                        locinput += UTF8SKIP(locinput);
+                    }
+                    else if (! is_GCB_SPECIAL_BEGIN_utf8(locinput)) {
  
                         /* Here did not match the required 'Begin' in the
                          * second term.  So just match the very first
                          * character, the '.' of the final term of the regex */
                         locinput = starting + UTF8SKIP(starting);
  
                         /* Here did not match the required 'Begin' in the
                          * second term.  So just match the very first
                          * character, the '.' of the final term of the regex */
                         locinput = starting + UTF8SKIP(starting);
+                        goto exit_utf8;
                     } else {
  
                     } else {
  
-                       /* Here is the beginning of a character that can have
-                        * an extender.  It is either a hangul syllable, or a
-                        * non-control */
-                       if (swash_fetch(PL_utf8_X_non_hangul,
-                                       (U8*)locinput, utf8_target))
-                       {
-
-                           /* Here not a Hangul syllable, must be a
-                            * ('!  * Control') */
-                           locinput += UTF8SKIP(locinput);
-                       } else {
-
-                           /* Here is a Hangul syllable.  It can be composed
-                            * of several individual characters.  One
-                            * possibility is T+ */
-                           if (swash_fetch(PL_utf8_X_T,
-                                           (U8*)locinput, utf8_target))
-                           {
-                               while (locinput < PL_regeol
-                                       && swash_fetch(PL_utf8_X_T,
-                                                       (U8*)locinput, utf8_target))
-                               {
-                                   locinput += UTF8SKIP(locinput);
-                               }
-                           } else {
-
-                               /* Here, not T+, but is a Hangul.  That means
-                                * it is one of the others: L, LV, LVT or V,
-                                * and matches:
-                                * L* (L | LVT T* | V  V* T* | LV  V* T*) */
-
-                               /* Match L*           */
-                               while (locinput < PL_regeol
-                                       && swash_fetch(PL_utf8_X_L,
-                                                       (U8*)locinput, utf8_target))
-                               {
-                                   locinput += UTF8SKIP(locinput);
-                               }
+                        /* Here is a special begin.  It can be composed of
+                         * several individual characters.  One possibility is
+                         * RI+ */
+                        if ((len = is_GCB_RI_utf8(locinput))) {
+                            locinput += len;
+                            while (locinput < PL_regeol
+                                   && (len = is_GCB_RI_utf8(locinput)))
+                            {
+                                locinput += len;
+                            }
+                        } else if ((len = is_GCB_T_utf8(locinput))) {
+                            /* Another possibility is T+ */
+                            locinput += len;
+                            while (locinput < PL_regeol
+                                && (len = is_GCB_T_utf8(locinput)))
+                            {
+                                locinput += len;
+                            }
+                        } else {
+
+                            /* Here, neither RI+ nor T+; must be some other
+                             * Hangul.  That means it is one of the others: L,
+                             * LV, LVT or V, and matches:
+                             * L* (L | LVT T* | V * V* T* | LV  V* T*) */
+
+                            /* Match L*           */
+                            while (locinput < PL_regeol
+                                   && (len = is_GCB_L_utf8(locinput)))
+                            {
+                                locinput += len;
+                            }
  
  
-                               /* Here, have exhausted L*.  If the next
-                                * character is not an LV, LVT nor V, it means
-                                * we had to have at least one L, so matches L+
-                                * in the original equation, we have a complete
-                                * hangul syllable.  Are done. */
-
-                               if (locinput < PL_regeol
-                                   && swash_fetch(PL_utf8_X_LV_LVT_V,
-                                                   (U8*)locinput, utf8_target))
-                               {
-
-                                   /* Otherwise keep going.  Must be LV, LVT
-                                    * or V.  See if LVT */
-                                   if (swash_fetch(PL_utf8_X_LVT,
-                                                   (U8*)locinput, utf8_target))
-                                   {
-                                       locinput += UTF8SKIP(locinput);
-                                   } else {
-
-                                       /* Must be  V or LV.  Take it, then
-                                        * match V*     */
-                                       locinput += UTF8SKIP(locinput);
-                                       while (locinput < PL_regeol
-                                               && swash_fetch(PL_utf8_X_V,
-                                                        (U8*)locinput, utf8_target))
-                                       {
-                                           locinput += UTF8SKIP(locinput);
-                                       }
-                                   }
+                            /* Here, have exhausted L*.  If the next character
+                             * is not an LV, LVT nor V, it means we had to have
+                             * at least one L, so matches L+ in the original
+                             * equation, we have a complete hangul syllable.
+                             * Are done. */
+
+                            if (locinput < PL_regeol
+                                && is_GCB_LV_LVT_V_utf8(locinput))
+                            {
+
+                                /* Otherwise keep going.  Must be LV, LVT or V.
+                                 * See if LVT */
+                                if (is_utf8_X_LVT((U8*)locinput)) {
+                                    locinput += UTF8SKIP(locinput);
+                                } else {
+
+                                    /* Must be  V or LV.  Take it, then match
+                                     * V*     */
+                                    locinput += UTF8SKIP(locinput);
+                                    while (locinput < PL_regeol
+                                           && (len = is_GCB_V_utf8(locinput)))
+                                    {
+                                        locinput += len;
+                                    }
+                                }
  
  
-                                   /* And any of LV, LVT, or V can be followed
-                                    * by T*            */
-                                   while (locinput < PL_regeol
-                                          && swash_fetch(PL_utf8_X_T,
-                                                          (U8*)locinput,
-                                                          utf8_target))
-                                   {
-                                       locinput += UTF8SKIP(locinput);
-                                   }
-                               }
-                           }
-                       }
+                                /* And any of LV, LVT, or V can be followed
+                                 * by T*            */
+                                while (locinput < PL_regeol
+                                       && (len = is_GCB_T_utf8(locinput)))
+                                {
+                                    locinput += len;
+                                }
+                            }
+                        }
+                    }
  
  
-                       /* Match any extender */
-                       while (locinput < PL_regeol
-                               && swash_fetch(PL_utf8_X_extend,
-                                               (U8*)locinput, utf8_target))
-                       {
-                           locinput += UTF8SKIP(locinput);
-                       }
-                   }
+                    /* Match any extender */
+                    while (locinput < PL_regeol
+                            && swash_fetch(PL_utf8_X_extend,
+                                            (U8*)locinput, utf8_target))
+                    {
+                        locinput += UTF8SKIP(locinput);
+                    }
                 }
                 }
+            exit_utf8:
                 if (locinput > PL_regeol) sayNO;
             }
                 if (locinput > PL_regeol) sayNO;
             }
-           nextchr = UCHARAT(locinput);
             break;
              
             break;
              
-       case NREFFL:
+       case NREFFL:  /*  /\g{name}/il  */
         {   /* The capture buffer cases.  The ones beginning with N for the
                named buffers just convert to the equivalent numbered and
                pretend they were called as the corresponding numbered buffer
         {   /* The capture buffer cases.  The ones beginning with N for the
                named buffers just convert to the equivalent numbered and
                pretend they were called as the corresponding numbered buffer
@@ -4151,28 +4567,28 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
             goto do_nref;
  
             utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
             goto do_nref;
  
-       case NREFFA:
+       case NREFFA:  /*  /\g{name}/iaa  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFA;
             utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_nref;
  
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFA;
             utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_nref;
  
-       case NREFFU:
+       case NREFFU:  /*  /\g{name}/iu  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFU;
             utf8_fold_flags = 0;
             goto do_nref;
  
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             type = REFFU;
             utf8_fold_flags = 0;
             goto do_nref;
  
-       case NREFF:
+       case NREFF:  /*  /\g{name}/i  */
             folder = foldEQ;
             fold_array = PL_fold;
             type = REFF;
             utf8_fold_flags = 0;
             goto do_nref;
  
             folder = foldEQ;
             fold_array = PL_fold;
             type = REFF;
             utf8_fold_flags = 0;
             goto do_nref;
  
-       case NREF:
+       case NREF:  /*  /\g{name}/   */
             type = REF;
             folder = NULL;
             fold_array = NULL;
             type = REF;
             folder = NULL;
             fold_array = NULL;
@@ -4188,32 +4604,32 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             }
             goto do_nref_ref_common;
  
             }
             goto do_nref_ref_common;
  
-       case REFFL:
+       case REFFL:  /*  /\1/il  */
             PL_reg_flags |= RF_tainted;
             folder = foldEQ_locale;
             fold_array = PL_fold_locale;
             utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
             goto do_ref;
  
             PL_reg_flags |= RF_tainted;
             folder = foldEQ_locale;
             fold_array = PL_fold_locale;
             utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
             goto do_ref;
  
-       case REFFA:
+       case REFFA:  /*  /\1/iaa  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_ref;
  
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_ref;
  
-       case REFFU:
+       case REFFU:  /*  /\1/iu  */
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             utf8_fold_flags = 0;
             goto do_ref;
  
             folder = foldEQ_latin1;
             fold_array = PL_fold_latin1;
             utf8_fold_flags = 0;
             goto do_ref;
  
-       case REFF:
+       case REFF:  /*  /\1/i  */
             folder = foldEQ;
             fold_array = PL_fold;
             utf8_fold_flags = 0;
             goto do_ref;
  
             folder = foldEQ;
             fold_array = PL_fold;
             utf8_fold_flags = 0;
             goto do_ref;
  
-        case REF:
+        case REF:  /*  /\1/    */
             folder = NULL;
             fold_array = NULL;
             utf8_fold_flags = 0;
             folder = NULL;
             fold_array = NULL;
             utf8_fold_flags = 0;
@@ -4239,7 +4655,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 /* This call case insensitively compares the entire buffer
                     * at s, with the current input starting at locinput, but
                     * not going off the end given by PL_regeol, and returns in
                 /* This call case insensitively compares the entire buffer
                     * at s, with the current input starting at locinput, but
                     * not going off the end given by PL_regeol, and returns in
-                   * limit upon success, how much of the current input was
+                   * <limit> upon success, how much of the current input was
                     * matched */
                 if (! foldEQ_utf8_flags(s, NULL, rex->offs[n].end - ln, utf8_target,
                                     locinput, &limit, 0, utf8_target, utf8_fold_flags))
                     * matched */
                 if (! foldEQ_utf8_flags(s, NULL, rex->offs[n].end - ln, utf8_target,
                                     locinput, &limit, 0, utf8_target, utf8_fold_flags))
@@ -4247,12 +4663,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                     sayNO;
                 }
                 locinput = limit;
                     sayNO;
                 }
                 locinput = limit;
-               nextchr = UCHARAT(locinput);
                 break;
             }
  
             /* Not utf8:  Inline the first character, for speed. */
                 break;
             }
  
             /* Not utf8:  Inline the first character, for speed. */
-           if (UCHARAT(s) != nextchr &&
+           if (!NEXTCHR_IS_EOS &&
+                UCHARAT(s) != nextchr &&
                 (type == REF ||
                  UCHARAT(s) != fold_array[nextchr]))
                 sayNO;
                 (type == REF ||
                  UCHARAT(s) != fold_array[nextchr]))
                 sayNO;
@@ -4264,13 +4680,16 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                            : ! folder(s, locinput, ln)))
                 sayNO;
             locinput += ln;
                            : ! folder(s, locinput, ln)))
                 sayNO;
             locinput += ln;
-           nextchr = UCHARAT(locinput);
             break;
         }
             break;
         }
-       case NOTHING:
-       case TAIL:
+
+       case NOTHING: /* null op; e.g. the 'nothing' following
+                       * the '*' in m{(a+|b)*}' */
             break;
             break;
-       case BACK:
+       case TAIL: /* placeholder while compiling (A|B|C) */
+           break;
+
+       case BACK: /* ??? doesn't appear to be used ??? */
             break;
  
  #undef  ST
             break;
  
  #undef  ST
@@ -4282,7 +4701,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
              regexp_internal *rei;
              regnode *startpoint;
  
              regexp_internal *rei;
              regnode *startpoint;
  
-       case GOSTART:
+       case GOSTART: /*  (?R)  */
         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
             if (cur_eval && cur_eval->locinput==locinput) {
                  if (cur_eval->u.eval.close_paren == (U32)ARG(scan)) 
         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
             if (cur_eval && cur_eval->locinput==locinput) {
                  if (cur_eval->u.eval.close_paren == (U32)ARG(scan)) 
@@ -4306,6 +4725,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
              }
              goto eval_recurse_doit;
              assert(0); /* NOTREACHED */
              }
              goto eval_recurse_doit;
              assert(0); /* NOTREACHED */
+
          case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */        
              if (cur_eval && cur_eval->locinput==locinput) {
                 if ( ++nochange_depth > max_nochange_depth )
          case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */        
              if (cur_eval && cur_eval->locinput==locinput) {
                 if ( ++nochange_depth > max_nochange_depth )
@@ -4552,6 +4972,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                  RXp_MATCH_COPIED_off(re);
                  re->subbeg = rex->subbeg;
                  re->sublen = rex->sublen;
                  RXp_MATCH_COPIED_off(re);
                  re->subbeg = rex->subbeg;
                  re->sublen = rex->sublen;
+                re->suboffset = rex->suboffset;
+                re->subcoffset = rex->subcoffset;
                 rei = RXi_GET(re);
                  DEBUG_EXECUTE_r(
                      debug_start_match(re_sv, utf8_target, locinput, PL_regeol,
                 rei = RXi_GET(re);
                  DEBUG_EXECUTE_r(
                      debug_start_match(re_sv, utf8_target, locinput, PL_regeol,
@@ -4568,7 +4990,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 re->lastparen = 0;
                 re->lastcloseparen = 0;
  
                 re->lastparen = 0;
                 re->lastcloseparen = 0;
  
-               PL_reginput = locinput;
                 PL_regsize = 0;
  
                 /* XXXX This is too dramatic a measure... */
                 PL_regsize = 0;
  
                 /* XXXX This is too dramatic a measure... */
@@ -4592,7 +5013,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 ST.prev_eval = cur_eval;
                 cur_eval = st;
                 /* now continue from first node in postoned RE */
                 ST.prev_eval = cur_eval;
                 cur_eval = st;
                 /* now continue from first node in postoned RE */
-               PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
+               PUSH_YES_STATE_GOTO(EVAL_AB, startpoint, locinput);
                 assert(0); /* NOTREACHED */
         }
  
                 assert(0); /* NOTREACHED */
         }
  
@@ -4622,7 +5043,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             rex = (struct regexp *)SvANY(rex_sv);
             rexi = RXi_GET(rex); 
  
             rex = (struct regexp *)SvANY(rex_sv);
             rexi = RXi_GET(rex); 
  
-           PL_reginput = locinput;
             REGCP_UNWIND(ST.lastcp);
             regcppop(rex);
             cur_eval = ST.prev_eval;
             REGCP_UNWIND(ST.lastcp);
             regcppop(rex);
             cur_eval = ST.prev_eval;
@@ -4634,7 +5054,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             sayNO_SILENT;
  #undef ST
  
             sayNO_SILENT;
  #undef ST
  
-       case OPEN:
+       case OPEN: /*  (  */
             n = ARG(scan);  /* which paren pair */
             rex->offs[n].start_tmp = locinput - PL_bostr;
             if (n > PL_regsize)
             n = ARG(scan);  /* which paren pair */
             rex->offs[n].start_tmp = locinput - PL_bostr;
             if (n > PL_regsize)
@@ -4663,7 +5083,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
         (IV)rex->offs[n].end \
      ))
  
         (IV)rex->offs[n].end \
      ))
  
-       case CLOSE:
+       case CLOSE:  /*  )  */
             n = ARG(scan);  /* which paren pair */
             CLOSE_CAPTURE;
             /*if (n > PL_regsize)
             n = ARG(scan);  /* which paren pair */
             CLOSE_CAPTURE;
             /*if (n > PL_regsize)
@@ -4675,7 +5095,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                 goto fake_end;
             }    
             break;
                 goto fake_end;
             }    
             break;
-        case ACCEPT:
+
+        case ACCEPT:  /*  (*ACCEPT)  */
              if (ARG(scan)){
                  regnode *cursor;
                  for (cursor=scan;
              if (ARG(scan)){
                  regnode *cursor;
                  for (cursor=scan;
@@ -4700,22 +5121,27 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
              }
             goto fake_end;
             /*NOTREACHED*/          
              }
             goto fake_end;
             /*NOTREACHED*/          
-       case GROUPP:
+
+       case GROUPP:  /*  (?(1))  */
             n = ARG(scan);  /* which paren pair */
             sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
             break;
             n = ARG(scan);  /* which paren pair */
             sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
             break;
-       case NGROUPP:
+
+       case NGROUPP:  /*  (?(<name>))  */
             /* reg_check_named_buff_matched returns 0 for no match */
             sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
             break;
             /* reg_check_named_buff_matched returns 0 for no match */
             sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
             break;
-        case INSUBP:
+
+        case INSUBP:   /*  (?(R))  */
              n = ARG(scan);
              sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
              break;
              n = ARG(scan);
              sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
              break;
-        case DEFINEP:
+
+        case DEFINEP:  /*  (?(DEFINE))  */
              sw = 0;
              break;
              sw = 0;
              break;
-       case IFTHEN:
+
+       case IFTHEN:   /*  (?(cond)A|B)  */
             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
             if (sw)
                 next = NEXTOPER(NEXTOPER(scan));
             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
             if (sw)
                 next = NEXTOPER(NEXTOPER(scan));
@@ -4725,7 +5151,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                     next = NEXTOPER(NEXTOPER(next));
             }
             break;
                     next = NEXTOPER(NEXTOPER(next));
             }
             break;
-       case LOGICAL:
+
+       case LOGICAL:  /* modifier for EVAL and IFMATCH */
             logical = scan->flags;
             break;
  
             logical = scan->flags;
             break;
  
@@ -4842,8 +5269,7 @@ NULL
             ST.count = -1;      /* this will be updated by WHILEM */
             ST.lastloc = NULL;  /* this will be updated by WHILEM */
  
             ST.count = -1;      /* this will be updated by WHILEM */
             ST.lastloc = NULL;  /* this will be updated by WHILEM */
  
-           PL_reginput = locinput;
-           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
+           PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput);
             assert(0); /* NOTREACHED */
         }
  
             assert(0); /* NOTREACHED */
         }
  
@@ -4876,7 +5302,6 @@ NULL
             ST.cache_offset = 0;
             ST.cache_mask = 0;
             
             ST.cache_offset = 0;
             ST.cache_mask = 0;
             
-           PL_reginput = locinput;
  
             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
                   "%*s  whilem: matched %ld out of %d..%d\n",
  
             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
                   "%*s  whilem: matched %ld out of %d..%d\n",
@@ -4890,7 +5315,7 @@ NULL
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
  
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
  
-               PUSH_STATE_GOTO(WHILEM_A_pre, A);
+               PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput);
                 assert(0); /* NOTREACHED */
             }
  
                 assert(0); /* NOTREACHED */
             }
  
@@ -4964,7 +5389,8 @@ NULL
                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
                 ST.cp = regcppush(rex, ST.save_curlyx->u.curlyx.parenfloor);
                 REGCP_SET(ST.lastcp);
                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
                 ST.cp = regcppush(rex, ST.save_curlyx->u.curlyx.parenfloor);
                 REGCP_SET(ST.lastcp);
-               PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
+               PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
+                                    locinput);
                 assert(0); /* NOTREACHED */
             }
  
                 assert(0); /* NOTREACHED */
             }
  
@@ -4974,7 +5400,7 @@ NULL
                 ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
                 ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
                 cur_curlyx->u.curlyx.lastloc = locinput;
                 REGCP_SET(ST.lastcp);
-               PUSH_STATE_GOTO(WHILEM_A_max, A);
+               PUSH_STATE_GOTO(WHILEM_A_max, A, locinput);
                 assert(0); /* NOTREACHED */
             }
             goto do_whilem_B_max;
                 assert(0); /* NOTREACHED */
             }
             goto do_whilem_B_max;
@@ -5007,7 +5433,6 @@ NULL
         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
             REGCP_UNWIND(ST.lastcp);
             regcppop(rex);      /* Restore some previous $<digit>s? */
         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
             REGCP_UNWIND(ST.lastcp);
             regcppop(rex);      /* Restore some previous $<digit>s? */
-           PL_reginput = locinput;
             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
                 "%*s  whilem: failed, trying continuation...\n",
                 REPORT_CODE_OFF+depth*2, "")
             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
                 "%*s  whilem: failed, trying continuation...\n",
                 REPORT_CODE_OFF+depth*2, "")
@@ -5027,7 +5452,8 @@ NULL
             /* now try B */
             ST.save_curlyx = cur_curlyx;
             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
             /* now try B */
             ST.save_curlyx = cur_curlyx;
             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
-           PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
+           PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
+                                locinput);
             assert(0); /* NOTREACHED */
  
         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
             assert(0); /* NOTREACHED */
  
         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
@@ -5055,12 +5481,12 @@ NULL
                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
             );
             /* Try grabbing another A and see if it helps. */
                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
             );
             /* Try grabbing another A and see if it helps. */
-           PL_reginput = locinput;
             cur_curlyx->u.curlyx.lastloc = locinput;
             ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
             REGCP_SET(ST.lastcp);
             PUSH_STATE_GOTO(WHILEM_A_min,
             cur_curlyx->u.curlyx.lastloc = locinput;
             ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
             REGCP_SET(ST.lastcp);
             PUSH_STATE_GOTO(WHILEM_A_min,
-               /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS);
+               /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
+                locinput);
             assert(0); /* NOTREACHED */
  
  #undef  ST
             assert(0); /* NOTREACHED */
  
  #undef  ST
@@ -5079,21 +5505,21 @@ NULL
             ST.lastcloseparen = rex->lastcloseparen;
             ST.next_branch = next;
             REGCP_SET(ST.cp);
             ST.lastcloseparen = rex->lastcloseparen;
             ST.next_branch = next;
             REGCP_SET(ST.cp);
-           PL_reginput = locinput;
  
             /* Now go into the branch */
             if (has_cutgroup) {
  
             /* Now go into the branch */
             if (has_cutgroup) {
-               PUSH_YES_STATE_GOTO(BRANCH_next, scan);    
+               PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput);
             } else {
             } else {
-               PUSH_STATE_GOTO(BRANCH_next, scan);
+               PUSH_STATE_GOTO(BRANCH_next, scan, locinput);
             }
             assert(0); /* NOTREACHED */
             }
             assert(0); /* NOTREACHED */
-        case CUTGROUP:
-            PL_reginput = locinput;
+
+        case CUTGROUP:  /*  /(*THEN)/  */
              sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
                  MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
              sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
                  MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
-            PUSH_STATE_GOTO(CUTGROUP_next,next);
+            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput);
              assert(0); /* NOTREACHED */
              assert(0); /* NOTREACHED */
+
          case CUTGROUP_next_fail:
              do_cutgroup = 1;
              no_final = 1;
          case CUTGROUP_next_fail:
              do_cutgroup = 1;
              no_final = 1;
@@ -5101,9 +5527,11 @@ NULL
                  sv_commit = st->u.mark.mark_name;
              sayNO;         
              assert(0); /* NOTREACHED */
                  sv_commit = st->u.mark.mark_name;
              sayNO;         
              assert(0); /* NOTREACHED */
+
          case BRANCH_next:
              sayYES;
              assert(0); /* NOTREACHED */
          case BRANCH_next:
              sayYES;
              assert(0); /* NOTREACHED */
+
         case BRANCH_next_fail: /* that branch failed; try the next, if any */
             if (do_cutgroup) {
                 do_cutgroup = 0;
         case BRANCH_next_fail: /* that branch failed; try the next, if any */
             if (do_cutgroup) {
                 do_cutgroup = 0;
@@ -5126,7 +5554,7 @@ NULL
             continue; /* execute next BRANCH[J] op */
             assert(0); /* NOTREACHED */
      
             continue; /* execute next BRANCH[J] op */
             assert(0); /* NOTREACHED */
      
-       case MINMOD:
+       case MINMOD: /* next op will be non-greedy, e.g. A*?  */
             minmod = 1;
             break;
  
             minmod = 1;
             break;
  
@@ -5167,26 +5595,22 @@ NULL
                 goto curlym_do_B;
  
           curlym_do_A: /* execute the A in /A{m,n}B/  */
                 goto curlym_do_B;
  
           curlym_do_A: /* execute the A in /A{m,n}B/  */
-           PL_reginput = locinput;
-           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
+           PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput); /* match A */
             assert(0); /* NOTREACHED */
  
         case CURLYM_A: /* we've just matched an A */
             assert(0); /* NOTREACHED */
  
         case CURLYM_A: /* we've just matched an A */
-           locinput = st->locinput;
-           nextchr = UCHARAT(locinput);
-
             ST.count++;
             /* after first match, determine A's length: u.curlym.alen */
             if (ST.count == 1) {
                 if (PL_reg_match_utf8) {
             ST.count++;
             /* after first match, determine A's length: u.curlym.alen */
             if (ST.count == 1) {
                 if (PL_reg_match_utf8) {
-                   char *s = locinput;
-                   while (s < PL_reginput) {
+                   char *s = st->locinput;
+                   while (s < locinput) {
                         ST.alen++;
                         s += UTF8SKIP(s);
                     }
                 }
                 else {
                         ST.alen++;
                         s += UTF8SKIP(s);
                     }
                 }
                 else {
-                   ST.alen = PL_reginput - locinput;
+                   ST.alen = locinput - st->locinput;
                 }
                 if (ST.alen == 0)
                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
                 }
                 if (ST.alen == 0)
                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
@@ -5198,8 +5622,6 @@ NULL
                           (IV) ST.count, (IV)ST.alen)
             );
  
                           (IV) ST.count, (IV)ST.alen)
             );
  
-           locinput = PL_reginput;
-                       
             if (cur_eval && cur_eval->u.eval.close_paren && 
                 cur_eval->u.eval.close_paren == (U32)ST.me->flags) 
                 goto fake_end;
             if (cur_eval && cur_eval->u.eval.close_paren && 
                 cur_eval->u.eval.close_paren == (U32)ST.me->flags) 
                 goto fake_end;
@@ -5220,7 +5642,6 @@ NULL
                 sayNO;
  
           curlym_do_B: /* execute the B in /A{m,n}B/  */
                 sayNO;
  
           curlym_do_B: /* execute the B in /A{m,n}B/  */
-           PL_reginput = locinput;
             if (ST.c1 == CHRTEST_UNINIT) {
                 /* calculate c1 and c2 for possible match of 1st char
                  * following curly */
             if (ST.c1 == CHRTEST_UNINIT) {
                 /* calculate c1 and c2 for possible match of 1st char
                  * following curly */
@@ -5238,19 +5659,12 @@ NULL
                         if this changes back then the macro for 
                         IS_TEXT and friends need to change.
                      */
                         if this changes back then the macro for 
                         IS_TEXT and friends need to change.
                      */
-                   if (PL_regkind[OP(text_node)] == EXACT)
-                   {
-                       
-                       ST.c1 = (U8)*STRING(text_node);
-                       switch (OP(text_node)) {
-                           case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
-                           case EXACTFA:
-                           case EXACTFU_SS:
-                           case EXACTFU_TRICKYFOLD:
-                           case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
-                           case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
-                           default: ST.c2 = ST.c1;
-                       }
+                   if (PL_regkind[OP(text_node)] == EXACT) {
+                        if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
+                           text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
+                        {
+                            sayNO;
+                        }
                     }
                 }
             }
                     }
                 }
             }
@@ -5261,28 +5675,44 @@ NULL
                     (int)(REPORT_CODE_OFF+(depth*2)),
                     "", (IV)ST.count)
                 );
                     (int)(REPORT_CODE_OFF+(depth*2)),
                     "", (IV)ST.count)
                 );
-           if (ST.c1 != CHRTEST_VOID
-                   && UCHARAT(PL_reginput) != ST.c1
-                   && UCHARAT(PL_reginput) != ST.c2)
-           {
-               /* simulate B failing */
-               DEBUG_OPTIMISE_r(
-                   PerlIO_printf(Perl_debug_log,
-                       "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
-                       (int)(REPORT_CODE_OFF+(depth*2)),"",
-                       (IV)ST.c1,(IV)ST.c2
-               ));
-               state_num = CURLYM_B_fail;
-               goto reenter_switch;
-           }
+           if (! NEXTCHR_IS_EOS && ST.c1 != CHRTEST_VOID) {
+                if (! UTF8_IS_INVARIANT(nextchr) && utf8_target) {
+                    if (memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
+                        && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
+                    {
+                        /* simulate B failing */
+                        DEBUG_OPTIMISE_r(
+                            PerlIO_printf(Perl_debug_log,
+                                "%*s  CURLYM Fast bail next target=U+%"UVXf" c1=U+%"UVXf" c2=U+%"UVXf"\n",
+                                (int)(REPORT_CODE_OFF+(depth*2)),"",
+                                valid_utf8_to_uvchr((U8 *) locinput, NULL),
+                                valid_utf8_to_uvchr(ST.c1_utf8, NULL),
+                                valid_utf8_to_uvchr(ST.c2_utf8, NULL))
+                        );
+                        state_num = CURLYM_B_fail;
+                        goto reenter_switch;
+                    }
+                }
+                else if (nextchr != ST.c1 && nextchr != ST.c2) {
+                    /* simulate B failing */
+                    DEBUG_OPTIMISE_r(
+                        PerlIO_printf(Perl_debug_log,
+                            "%*s  CURLYM Fast bail next target=U+%X c1=U+%X c2=U+%X\n",
+                            (int)(REPORT_CODE_OFF+(depth*2)),"",
+                            (int) nextchr, ST.c1, ST.c2)
+                    );
+                    state_num = CURLYM_B_fail;
+                    goto reenter_switch;
+                }
+            }
  
             if (ST.me->flags) {
                 /* emulate CLOSE: mark current A as captured */
                 I32 paren = ST.me->flags;
                 if (ST.count) {
                     rex->offs[paren].start
  
             if (ST.me->flags) {
                 /* emulate CLOSE: mark current A as captured */
                 I32 paren = ST.me->flags;
                 if (ST.count) {
                     rex->offs[paren].start
-                       = HOPc(PL_reginput, -ST.alen) - PL_bostr;
-                   rex->offs[paren].end = PL_reginput - PL_bostr;
+                       = HOPc(locinput, -ST.alen) - PL_bostr;
+                   rex->offs[paren].end = locinput - PL_bostr;
                     if ((U32)paren > rex->lastparen)
                         rex->lastparen = paren;
                     rex->lastcloseparen = paren;
                     if ((U32)paren > rex->lastparen)
                         rex->lastparen = paren;
                     rex->lastcloseparen = paren;
@@ -5299,7 +5729,7 @@ NULL
                 }
             }
             
                 }
             }
             
-           PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
+           PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput); /* match B */
             assert(0); /* NOTREACHED */
  
         case CURLYM_B_fail: /* just failed to match a B */
             assert(0); /* NOTREACHED */
  
         case CURLYM_B_fail: /* just failed to match a B */
@@ -5315,7 +5745,7 @@ NULL
             if (ST.count == ARG1(ST.me) /* min */)
                 sayNO;
             ST.count--;
             if (ST.count == ARG1(ST.me) /* min */)
                 sayNO;
             ST.count--;
-           locinput = HOPc(locinput, -ST.alen);
+           SET_locinput(HOPc(locinput, -ST.alen));
             goto curlym_do_B; /* try to match B */
  
  #undef ST
             goto curlym_do_B; /* try to match B */
  
  #undef ST
@@ -5337,21 +5767,23 @@ NULL
         } \
      }
  
         } \
      }
  
-       case STAR:              /*  /A*B/ where A is width 1 */
+        case STAR:             /*  /A*B/ where A is width 1 char */
             ST.paren = 0;
             ST.min = 0;
             ST.max = REG_INFTY;
             scan = NEXTOPER(scan);
             goto repeat;
             ST.paren = 0;
             ST.min = 0;
             ST.max = REG_INFTY;
             scan = NEXTOPER(scan);
             goto repeat;
-       case PLUS:              /*  /A+B/ where A is width 1 */
+
+        case PLUS:             /*  /A+B/ where A is width 1 char */
             ST.paren = 0;
             ST.min = 1;
             ST.max = REG_INFTY;
             scan = NEXTOPER(scan);
             goto repeat;
             ST.paren = 0;
             ST.min = 1;
             ST.max = REG_INFTY;
             scan = NEXTOPER(scan);
             goto repeat;
-       case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
-           ST.paren = scan->flags;     /* Which paren to set */
-           ST.lastparen      = rex->lastparen;
+
+       case CURLYN:            /*  /(A){m,n}B/ where A is width 1 char */
+            ST.paren = scan->flags;    /* Which paren to set */
+            ST.lastparen      = rex->lastparen;
             ST.lastcloseparen = rex->lastcloseparen;
             if (ST.paren > PL_regsize)
                 PL_regsize = ST.paren;
             ST.lastcloseparen = rex->lastcloseparen;
             if (ST.paren > PL_regsize)
                 PL_regsize = ST.paren;
@@ -5364,7 +5796,8 @@ NULL
             }
              scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
             goto repeat;
             }
              scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
             goto repeat;
-       case CURLY:             /*  /A{m,n}B/ where A is width 1 */
+
+       case CURLY:             /*  /A{m,n}B/ where A is width 1 char */
             ST.paren = 0;
             ST.min = ARG1(scan);  /* min to match */
             ST.max = ARG2(scan);  /* max to match */
             ST.paren = 0;
             ST.min = ARG1(scan);  /* min to match */
             ST.max = ARG2(scan);  /* max to match */
@@ -5379,10 +5812,11 @@ NULL
             * of the quantifier and the EXACT-like node.  -- japhy
             */
  
             * of the quantifier and the EXACT-like node.  -- japhy
             */
  
-           if (ST.min > ST.max) /* XXX make this a compile-time check? */
-               sayNO;
-           if (HAS_TEXT(next) || JUMPABLE(next)) {
-               U8 *s;
+           assert(ST.min <= ST.max);
+            if (! HAS_TEXT(next) && ! JUMPABLE(next)) {
+                ST.c1 = ST.c2 = CHRTEST_VOID;
+            }
+            else {
                 regnode *text_node = next;
  
                 if (! HAS_TEXT(text_node)) 
                 regnode *text_node = next;
  
                 if (! HAS_TEXT(text_node)) 
@@ -5393,10 +5827,8 @@ NULL
                 else {
                     if ( PL_regkind[OP(text_node)] != EXACT ) {
                         ST.c1 = ST.c2 = CHRTEST_VOID;
                 else {
                     if ( PL_regkind[OP(text_node)] != EXACT ) {
                         ST.c1 = ST.c2 = CHRTEST_VOID;
-                       goto assume_ok_easy;
                     }
                     }
-                   else
-                       s = (U8*)STRING(text_node);
+                   else {
                      
                      /*  Currently we only get here when 
                          
                      
                      /*  Currently we only get here when 
                          
@@ -5404,47 +5836,24 @@ NULL
                      
                          if this changes back then the macro for IS_TEXT and 
                          friends need to change. */
                      
                          if this changes back then the macro for IS_TEXT and 
                          friends need to change. */
-                   if (!UTF_PATTERN) {
-                       ST.c1 = *s;
-                       switch (OP(text_node)) {
-                           case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
-                           case EXACTFA:
-                           case EXACTFU_SS:
-                           case EXACTFU_TRICKYFOLD:
-                           case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
-                           case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
-                           default: ST.c2 = ST.c1; break;
-                       }
-                   }
-                   else { /* UTF_PATTERN */
-                       if (IS_TEXTFU(text_node) || IS_TEXTF(text_node)) {
-                            STRLEN ulen;
-                            U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
-
-                            to_utf8_fold((U8*)s, tmpbuf, &ulen);
-                            ST.c1 = ST.c2 = utf8n_to_uvchr(tmpbuf, UTF8_MAXLEN, 0,
-                                                   uniflags);
-                       }
-                       else {
-                           ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
-                                                    uniflags);
-                       }
-                   }
+                        if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
+                           text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
+                        {
+                            sayNO;
+                        }
+                    }
                 }
             }
                 }
             }
-           else
-               ST.c1 = ST.c2 = CHRTEST_VOID;
-       assume_ok_easy:
  
             ST.A = scan;
             ST.B = next;
  
             ST.A = scan;
             ST.B = next;
-           PL_reginput = locinput;
             if (minmod) {
             if (minmod) {
+                char *li = locinput;
                 minmod = 0;
                 minmod = 0;
-               if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
+               if (ST.min && regrepeat(rex, &li, ST.A, ST.min, depth) < ST.min)
                     sayNO;
                     sayNO;
+                SET_locinput(li);
                 ST.count = ST.min;
                 ST.count = ST.min;
-               locinput = PL_reginput;
                 REGCP_SET(ST.cp);
                 if (ST.c1 == CHRTEST_VOID)
                     goto curly_try_B_min;
                 REGCP_SET(ST.cp);
                 if (ST.c1 == CHRTEST_VOID)
                     goto curly_try_B_min;
@@ -5474,10 +5883,13 @@ NULL
  
             }
             else {
  
             }
             else {
-               ST.count = regrepeat(rex, ST.A, ST.max, depth);
-               locinput = PL_reginput;
+                /* avoid taking address of locinput, so it can remain
+                 * a register var */
+                char *li = locinput;
+               ST.count = regrepeat(rex, &li, ST.A, ST.max, depth);
                 if (ST.count < ST.min)
                     sayNO;
                 if (ST.count < ST.min)
                     sayNO;
+                SET_locinput(li);
                 if ((ST.count > ST.min)
                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
                 {
                 if ((ST.count > ST.min)
                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
                 {
@@ -5487,7 +5899,7 @@ NULL
                     /* ...except that $ and \Z can match before *and* after
                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
                        We may back off by one in this case. */
                     /* ...except that $ and \Z can match before *and* after
                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
                        We may back off by one in this case. */
-                   if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
+                   if (UCHARAT(locinput - 1) == '\n' && OP(ST.B) != EOS)
                         ST.min--;
                 }
                 REGCP_SET(ST.cp);
                         ST.min--;
                 }
                 REGCP_SET(ST.cp);
@@ -5499,7 +5911,6 @@ NULL
         case CURLY_B_min_known_fail:
             /* failed to find B in a non-greedy match where c1,c2 valid */
  
         case CURLY_B_min_known_fail:
             /* failed to find B in a non-greedy match where c1,c2 valid */
  
-           PL_reginput = locinput;     /* Could be reset... */
             REGCP_UNWIND(ST.cp);
              if (ST.paren) {
                  UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
             REGCP_UNWIND(ST.cp);
              if (ST.paren) {
                  UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
@@ -5518,31 +5929,26 @@ NULL
                 if (utf8_target) {
                     n = (ST.oldloc == locinput) ? 0 : 1;
                     if (ST.c1 == ST.c2) {
                 if (utf8_target) {
                     n = (ST.oldloc == locinput) ? 0 : 1;
                     if (ST.c1 == ST.c2) {
-                       STRLEN len;
                         /* set n to utf8_distance(oldloc, locinput) */
                         /* set n to utf8_distance(oldloc, locinput) */
-                       while (locinput <= ST.maxpos &&
-                              utf8n_to_uvchr((U8*)locinput,
-                                             UTF8_MAXBYTES, &len,
-                                             uniflags) != (UV)ST.c1) {
-                           locinput += len;
+                       while (locinput <= ST.maxpos
+                              && memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput)))
+                        {
+                           locinput += UTF8SKIP(locinput);
                             n++;
                         }
                     }
                     else {
                         /* set n to utf8_distance(oldloc, locinput) */
                             n++;
                         }
                     }
                     else {
                         /* set n to utf8_distance(oldloc, locinput) */
-                       while (locinput <= ST.maxpos) {
-                           STRLEN len;
-                           const UV c = utf8n_to_uvchr((U8*)locinput,
-                                                 UTF8_MAXBYTES, &len,
-                                                 uniflags);
-                           if (c == (UV)ST.c1 || c == (UV)ST.c2)
-                               break;
-                           locinput += len;
+                       while (locinput <= ST.maxpos
+                              && memNE(locinput, ST.c1_utf8, UTF8SKIP(locinput))
+                              && memNE(locinput, ST.c2_utf8, UTF8SKIP(locinput)))
+                        {
+                           locinput += UTF8SKIP(locinput);
                             n++;
                         }
                     }
                 }
                             n++;
                         }
                     }
                 }
-               else {
+               else {  /* Not utf8_target */
                     if (ST.c1 == ST.c2) {
                         while (locinput <= ST.maxpos &&
                                UCHARAT(locinput) != ST.c1)
                     if (ST.c1 == ST.c2) {
                         while (locinput <= ST.maxpos &&
                                UCHARAT(locinput) != ST.c1)
@@ -5558,19 +5964,22 @@ NULL
                 }
                 if (locinput > ST.maxpos)
                     sayNO;
                 }
                 if (locinput > ST.maxpos)
                     sayNO;
-               /* PL_reginput == oldloc now */
                 if (n) {
                 if (n) {
+                    /* In /a{m,n}b/, ST.oldloc is at "a" x m, locinput is
+                     * at b; check that everything between oldloc and
+                     * locinput matches */
+                    char *li = ST.oldloc;
                     ST.count += n;
                     ST.count += n;
-                   if (regrepeat(rex, ST.A, n, depth) < n)
+                   if (regrepeat(rex, &li, ST.A, n, depth) < n)
                         sayNO;
                         sayNO;
+                    assert(n == REG_INFTY || locinput == li);
                 }
                 }
-               PL_reginput = locinput;
                 CURLY_SETPAREN(ST.paren, ST.count);
                 if (cur_eval && cur_eval->u.eval.close_paren && 
                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
                     goto fake_end;
                 }
                 CURLY_SETPAREN(ST.paren, ST.count);
                 if (cur_eval && cur_eval->u.eval.close_paren && 
                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
                     goto fake_end;
                 }
-               PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
+               PUSH_STATE_GOTO(CURLY_B_min_known, ST.B, locinput);
             }
             assert(0); /* NOTREACHED */
  
             }
             assert(0); /* NOTREACHED */
  
@@ -5583,10 +5992,15 @@ NULL
                  UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
              }
             /* failed -- move forward one */
                  UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
              }
             /* failed -- move forward one */
-           PL_reginput = locinput;
-           if (regrepeat(rex, ST.A, 1, depth)) {
+            {
+                char *li = locinput;
+                if (!regrepeat(rex, &li, ST.A, 1, depth)) {
+                    sayNO;
+                }
+                locinput = li;
+            }
+            {
                 ST.count++;
                 ST.count++;
-               locinput = PL_reginput;
                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
                         ST.count > 0)) /* count overflow ? */
                 {
                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
                         ST.count > 0)) /* count overflow ? */
                 {
@@ -5596,10 +6010,10 @@ NULL
                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
                          goto fake_end;
                      }
                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
                          goto fake_end;
                      }
-                   PUSH_STATE_GOTO(CURLY_B_min, ST.B);
+                   PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
                 }
             }
                 }
             }
-           sayNO;
+            sayNO;
             assert(0); /* NOTREACHED */
  
  
             assert(0); /* NOTREACHED */
  
  
@@ -5610,19 +6024,32 @@ NULL
                  goto fake_end;
              }
             {
                  goto fake_end;
              }
             {
-               UV c = 0;
-               if (ST.c1 != CHRTEST_VOID)
-                   c = utf8_target ? utf8n_to_uvchr((U8*)PL_reginput,
-                                          UTF8_MAXBYTES, 0, uniflags)
-                               : (UV) UCHARAT(PL_reginput);
+               bool could_match = locinput < PL_regeol;
+
                 /* If it could work, try it. */
                 /* If it could work, try it. */
-               if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
+                if (ST.c1 != CHRTEST_VOID && could_match) {
+                    if (! UTF8_IS_INVARIANT(UCHARAT(locinput)) && utf8_target)
+                    {
+                        could_match = memEQ(locinput,
+                                            ST.c1_utf8,
+                                            UTF8SKIP(locinput))
+                                    || memEQ(locinput,
+                                             ST.c2_utf8,
+                                             UTF8SKIP(locinput));
+                    }
+                    else {
+                        could_match = UCHARAT(locinput) == ST.c1
+                                      || UCHARAT(locinput) == ST.c2;
+                    }
+                }
+                if (ST.c1 == CHRTEST_VOID || could_match) {
                     CURLY_SETPAREN(ST.paren, ST.count);
                     CURLY_SETPAREN(ST.paren, ST.count);
-                   PUSH_STATE_GOTO(CURLY_B_max, ST.B);
+                   PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput);
                     assert(0); /* NOTREACHED */
                 }
             }
             /* FALL THROUGH */
                     assert(0); /* NOTREACHED */
                 }
             }
             /* FALL THROUGH */
+
         case CURLY_B_max_fail:
             /* failed to find B in a greedy match */
  
         case CURLY_B_max_fail:
             /* failed to find B in a greedy match */
  
@@ -5633,12 +6060,12 @@ NULL
             /*  back up. */
             if (--ST.count < ST.min)
                 sayNO;
             /*  back up. */
             if (--ST.count < ST.min)
                 sayNO;
-           PL_reginput = locinput = HOPc(locinput, -1);
+           locinput = HOPc(locinput, -1);
             goto curly_try_B_max;
  
  #undef ST
  
             goto curly_try_B_max;
  
  #undef ST
  
-       case END:
+       case END: /*  last op of main pattern  */
             fake_end:
             if (cur_eval) {
                 /* we've just finished A in /(??{A})B/; now continue with B */
             fake_end:
             if (cur_eval) {
                 /* we've just finished A in /(??{A})B/; now continue with B */
@@ -5655,7 +6082,6 @@ NULL
                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
  
                 REGCP_SET(st->u.eval.lastcp);
                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
  
                 REGCP_SET(st->u.eval.lastcp);
-               PL_reginput = locinput;
  
                 /* Restore parens of the outer rex without popping the
                  * savestack */
  
                 /* Restore parens of the outer rex without popping the
                  * savestack */
@@ -5669,8 +6095,8 @@ NULL
                  if ( nochange_depth )
                     nochange_depth--;
  
                  if ( nochange_depth )
                     nochange_depth--;
  
-                PUSH_YES_STATE_GOTO(EVAL_AB,
-                       st->u.eval.prev_eval->u.eval.B); /* match B */
+                PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B,
+                                    locinput); /* match B */
             }
  
             if (locinput < reginfo->till) {
             }
  
             if (locinput < reginfo->till) {
@@ -5683,7 +6109,6 @@ NULL
                                                       
                 sayNO_SILENT;           /* Cannot match: too short. */
             }
                                                       
                 sayNO_SILENT;           /* Cannot match: too short. */
             }
-           PL_reginput = locinput;     /* put where regtry can find it */
             sayYES;                     /* Success! */
  
         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
             sayYES;                     /* Success! */
  
         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
@@ -5691,15 +6116,17 @@ NULL
             PerlIO_printf(Perl_debug_log,
                 "%*s  %ssubpattern success...%s\n",
                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
             PerlIO_printf(Perl_debug_log,
                 "%*s  %ssubpattern success...%s\n",
                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
-           PL_reginput = locinput;     /* put where regtry can find it */
             sayYES;                     /* Success! */
  
  #undef  ST
  #define ST st->u.ifmatch
  
             sayYES;                     /* Success! */
  
  #undef  ST
  #define ST st->u.ifmatch
  
+        {
+            char *newstart;
+
         case SUSPEND:   /* (?>A) */
             ST.wanted = 1;
         case SUSPEND:   /* (?>A) */
             ST.wanted = 1;
-           PL_reginput = locinput;
+           newstart = locinput;
             goto do_ifmatch;    
  
         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
             goto do_ifmatch;    
  
         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
@@ -5724,10 +6151,10 @@ NULL
                         next = NULL;
                     break;
                 }
                         next = NULL;
                     break;
                 }
-               PL_reginput = s;
+               newstart = s;
             }
             else
             }
             else
-               PL_reginput = locinput;
+               newstart = locinput;
  
           do_ifmatch:
             ST.me = scan;
  
           do_ifmatch:
             ST.me = scan;
@@ -5735,8 +6162,9 @@ NULL
             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
             
             /* execute body of (?...A) */
             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
             
             /* execute body of (?...A) */
-           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
+           PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), newstart);
             assert(0); /* NOTREACHED */
             assert(0); /* NOTREACHED */
+        }
  
         case IFMATCH_A_fail: /* body of (?...A) failed */
             ST.wanted = !ST.wanted;
  
         case IFMATCH_A_fail: /* body of (?...A) failed */
             ST.wanted = !ST.wanted;
@@ -5749,11 +6177,9 @@ NULL
             else if (!ST.wanted)
                 sayNO;
  
             else if (!ST.wanted)
                 sayNO;
  
-           if (OP(ST.me) == SUSPEND)
-               locinput = PL_reginput;
-           else {
-               locinput = PL_reginput = st->locinput;
-               nextchr = UCHARAT(locinput);
+           if (OP(ST.me) != SUSPEND) {
+                /* restore old position except for (?>...) */
+               locinput = st->locinput;
             }
             scan = ST.me + ARG(ST.me);
             if (scan == ST.me)
             }
             scan = ST.me + ARG(ST.me);
             if (scan == ST.me)
@@ -5762,40 +6188,46 @@ NULL
  
  #undef ST
  
  
  #undef ST
  
-       case LONGJMP:
+       case LONGJMP: /*  alternative with many branches compiles to
+                       * (BRANCHJ; EXACT ...; LONGJMP ) x N */
             next = scan + ARG(scan);
             if (next == scan)
                 next = NULL;
             break;
             next = scan + ARG(scan);
             if (next == scan)
                 next = NULL;
             break;
-       case COMMIT:
+
+       case COMMIT:  /*  (*COMMIT)  */
             reginfo->cutpoint = PL_regeol;
             /* FALLTHROUGH */
             reginfo->cutpoint = PL_regeol;
             /* FALLTHROUGH */
-       case PRUNE:
-           PL_reginput = locinput;
+
+       case PRUNE:   /*  (*PRUNE)   */
             if (!scan->flags)
                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
             if (!scan->flags)
                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
-           PUSH_STATE_GOTO(COMMIT_next,next);
+           PUSH_STATE_GOTO(COMMIT_next, next, locinput);
             assert(0); /* NOTREACHED */
             assert(0); /* NOTREACHED */
+
         case COMMIT_next_fail:
             no_final = 1;    
             /* FALLTHROUGH */       
         case COMMIT_next_fail:
             no_final = 1;    
             /* FALLTHROUGH */       
-       case OPFAIL:
+
+       case OPFAIL:   /* (*FAIL)  */
             sayNO;
             assert(0); /* NOTREACHED */
  
  #define ST st->u.mark
             sayNO;
             assert(0); /* NOTREACHED */
  
  #define ST st->u.mark
-        case MARKPOINT:
+        case MARKPOINT: /*  (*MARK:foo)  */
              ST.prev_mark = mark_state;
              ST.mark_name = sv_commit = sv_yes_mark 
                  = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
              mark_state = st;
              ST.prev_mark = mark_state;
              ST.mark_name = sv_commit = sv_yes_mark 
                  = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
              mark_state = st;
-            ST.mark_loc = PL_reginput = locinput;
-            PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
+            ST.mark_loc = locinput;
+            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput);
              assert(0); /* NOTREACHED */
              assert(0); /* NOTREACHED */
+
          case MARKPOINT_next:
              mark_state = ST.prev_mark;
              sayYES;
              assert(0); /* NOTREACHED */
          case MARKPOINT_next:
              mark_state = ST.prev_mark;
              sayYES;
              assert(0); /* NOTREACHED */
+
          case MARKPOINT_next_fail:
              if (popmark && sv_eq(ST.mark_name,popmark)) 
              {
          case MARKPOINT_next_fail:
              if (popmark && sv_eq(ST.mark_name,popmark)) 
              {
@@ -5816,13 +6248,13 @@ NULL
                  mark_state->u.mark.mark_name : NULL;
              sayNO;
              assert(0); /* NOTREACHED */
                  mark_state->u.mark.mark_name : NULL;
              sayNO;
              assert(0); /* NOTREACHED */
-        case SKIP:
-            PL_reginput = locinput;
+
+        case SKIP:  /*  (*SKIP)  */
              if (scan->flags) {
                  /* (*SKIP) : if we fail we cut here*/
                  ST.mark_name = NULL;
                  ST.mark_loc = locinput;
              if (scan->flags) {
                  /* (*SKIP) : if we fail we cut here*/
                  ST.mark_name = NULL;
                  ST.mark_loc = locinput;
-                PUSH_STATE_GOTO(SKIP_next,next);    
+                PUSH_STATE_GOTO(SKIP_next,next, locinput);
              } else {
                  /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was, 
                     otherwise do nothing.  Meaning we need to scan 
              } else {
                  /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was, 
                     otherwise do nothing.  Meaning we need to scan 
@@ -5835,13 +6267,14 @@ NULL
                                  find ) ) 
                      {
                          ST.mark_name = find;
                                  find ) ) 
                      {
                          ST.mark_name = find;
-                        PUSH_STATE_GOTO( SKIP_next, next );
+                        PUSH_STATE_GOTO( SKIP_next, next, locinput);
                      }
                      cur = cur->u.mark.prev_mark;
                  }
              }    
              /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
              break;    
                      }
                      cur = cur->u.mark.prev_mark;
                  }
              }    
              /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
              break;    
+
         case SKIP_next_fail:
             if (ST.mark_name) {
                 /* (*CUT:NAME) - Set up to search for the name as we 
         case SKIP_next_fail:
             if (ST.mark_name) {
                 /* (*CUT:NAME) - Set up to search for the name as we 
@@ -5861,43 +6294,55 @@ NULL
              sayNO;
              assert(0); /* NOTREACHED */
  #undef ST
              sayNO;
              assert(0); /* NOTREACHED */
  #undef ST
-        case LNBREAK:
-            if ((n=is_LNBREAK(locinput,utf8_target))) {
+
+        case LNBREAK: /* \R */
+            if ((n=is_LNBREAK_safe(locinput, PL_regeol, utf8_target))) {
                  locinput += n;
                  locinput += n;
-                nextchr = UCHARAT(locinput);
              } else
                  sayNO;
              break;
  
  #define CASE_CLASS(nAmE)                              \
          case nAmE:                                    \
              } else
                  sayNO;
              break;
  
  #define CASE_CLASS(nAmE)                              \
          case nAmE:                                    \
-           if (locinput >= PL_regeol)                \
+           if (NEXTCHR_IS_EOS)                       \
                 sayNO;                                \
              if ((n=is_##nAmE(locinput,utf8_target))) {    \
                  locinput += n;                        \
                 sayNO;                                \
              if ((n=is_##nAmE(locinput,utf8_target))) {    \
                  locinput += n;                        \
-                nextchr = UCHARAT(locinput);          \
              } else                                    \
                  sayNO;                                \
              break;                                    \
          case N##nAmE:                                 \
              } else                                    \
                  sayNO;                                \
              break;                                    \
          case N##nAmE:                                 \
-           if (locinput >= PL_regeol)                \
+           if (NEXTCHR_IS_EOS)                       \
                 sayNO;                                \
              if ((n=is_##nAmE(locinput,utf8_target))) {    \
                  sayNO;                                \
              } else {                                  \
                  locinput += UTF8SKIP(locinput);       \
                 sayNO;                                \
              if ((n=is_##nAmE(locinput,utf8_target))) {    \
                  sayNO;                                \
              } else {                                  \
                  locinput += UTF8SKIP(locinput);       \
-                nextchr = UCHARAT(locinput);          \
              }                                         \
              break
  
              }                                         \
              break
  
-        CASE_CLASS(VERTWS);
-        CASE_CLASS(HORIZWS);
+        CASE_CLASS(VERTWS);  /*  \v \V  */
+        CASE_CLASS(HORIZWS); /*  \h \H  */
  #undef CASE_CLASS
  
         default:
             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
                           PTR2UV(scan), OP(scan));
             Perl_croak(aTHX_ "regexp memory corruption");
  #undef CASE_CLASS
  
         default:
             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
                           PTR2UV(scan), OP(scan));
             Perl_croak(aTHX_ "regexp memory corruption");
+
+        /* this is a point to jump to in order to increment
+         * locinput by one character */
+        increment_locinput:
+            assert(!NEXTCHR_IS_EOS);
+            if (utf8_target) {
+                locinput += PL_utf8skip[nextchr];
+                /* locinput is allowed to go 1 char off the end, but not 2+ */
+                if (locinput > PL_regeol)
+                    sayNO;
+            }
+            else
+                locinput++;
+            break;
             
         } /* end switch */ 
  
             
         } /* end switch */ 
  
@@ -5944,8 +6389,7 @@ NULL
                 newst = S_push_slab(aTHX);
             PL_regmatch_state = newst;
  
                 newst = S_push_slab(aTHX);
             PL_regmatch_state = newst;
  
-           locinput = PL_reginput;
-           nextchr = UCHARAT(locinput);
+           locinput = pushinput;
             st = newst;
             continue;
             assert(0); /* NOTREACHED */
             st = newst;
             continue;
             assert(0); /* NOTREACHED */
@@ -5996,10 +6440,8 @@ yes:
         yes_state = st->u.yes.prev_yes_state;
         PL_regmatch_state = st;
          
         yes_state = st->u.yes.prev_yes_state;
         PL_regmatch_state = st;
          
-        if (no_final) {
+        if (no_final)
              locinput= st->locinput;
              locinput= st->locinput;
-            nextchr = UCHARAT(locinput);
-        }
         state_num = st->resume_state + no_final;
         goto reenter_switch;
      }
         state_num = st->resume_state + no_final;
         goto reenter_switch;
      }
@@ -6044,7 +6486,6 @@ no_silent:
         }
         PL_regmatch_state = st;
         locinput= st->locinput;
         }
         PL_regmatch_state = st;
         locinput= st->locinput;
-       nextchr = UCHARAT(locinput);
  
         DEBUG_STATE_pp("pop");
         depth--;
  
         DEBUG_STATE_pp("pop");
         depth--;
@@ -6083,25 +6524,31 @@ no_silent:
      /* clean up; in particular, free all slabs above current one */
      LEAVE_SCOPE(oldsave);
  
      /* clean up; in particular, free all slabs above current one */
      LEAVE_SCOPE(oldsave);
  
-    return result;
+    assert(!result ||  locinput - PL_bostr >= 0);
+    return result ?  locinput - PL_bostr : -1;
  }
  
  /*
   - regrepeat - repeatedly match something simple, report how many
  }
  
  /*
   - regrepeat - repeatedly match something simple, report how many
- */
-/*
- * [This routine now assumes that it will only match on things of length 1.
- * That was true before, but now we assume scan - reginput is the count,
- * rather than incrementing count on every character.  [Er, except utf8.]]
+ *
+ * What 'simple' means is a node which can be the operand of a quantifier like
+ * '+', or {1,3}
+ *
+ * startposp - pointer a pointer to the start position.  This is updated
+ *             to point to the byte following the highest successful
+ *             match.
+ * p         - the regnode to be repeatedly matched against.
+ * max       - maximum number of things to match.
+ * depth     - (for debugging) backtracking depth.
   */
  STATIC I32
   */
  STATIC I32
-S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
+S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 max, int depth)
  {
      dVAR;
  {
      dVAR;
-    char *scan;
+    char *scan;     /* Pointer to current position in target string */
      I32 c;
      I32 c;
-    char *loceol = PL_regeol;
-    I32 hardcount = 0;
+    char *loceol = PL_regeol;   /* local version */
+    I32 hardcount = 0;  /* How many matches so far */
      bool utf8_target = PL_reg_match_utf8;
      UV utf8_flags;
  #ifndef DEBUGGING
      bool utf8_target = PL_reg_match_utf8;
      UV utf8_flags;
  #ifndef DEBUGGING
@@ -6110,15 +6557,38 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
  
      PERL_ARGS_ASSERT_REGREPEAT;
  
  
      PERL_ARGS_ASSERT_REGREPEAT;
  
-    scan = PL_reginput;
+    scan = *startposp;
      if (max == REG_INFTY)
         max = I32_MAX;
      if (max == REG_INFTY)
         max = I32_MAX;
-    else if (max < loceol - scan)
+    else if (! utf8_target && scan + max < loceol)
         loceol = scan + max;
         loceol = scan + max;
+
+    /* Here, for the case of a non-UTF-8 target we have adjusted <loceol> down
+     * to the maximum of how far we should go in it (leaving it set to the real
+     * end, if the maximum permissible would take us beyond that).  This allows
+     * us to make the loop exit condition that we haven't gone past <loceol> to
+     * also mean that we haven't exceeded the max permissible count, saving a
+     * test each time through the loop.  But it assumes that the OP matches a
+     * single byte, which is true for most of the OPs below when applied to a
+     * non-UTF-8 target.  Those relatively few OPs that don't have this
+     * characteristic will have to compensate.
+     *
+     * There is no adjustment for UTF-8 targets, as the number of bytes per
+     * character varies.  OPs will have to test both that the count is less
+     * than the max permissible (using <hardcount> to keep track), and that we
+     * are still within the bounds of the string (using <loceol>.  A few OPs
+     * match a single byte no matter what the encoding.  They can omit the max
+     * test if, for the UTF-8 case, they do the adjustment that was skipped
+     * above.
+     *
+     * Thus, the code above sets things up for the common case; and exceptional
+     * cases need extra work; the common case is to make sure <scan> doesn't
+     * go past <loceol>, and for UTF-8 to also use <hardcount> to make sure the
+     * count doesn't exceed the maximum permissible */
+
      switch (OP(p)) {
      case REG_ANY:
         if (utf8_target) {
      switch (OP(p)) {
      case REG_ANY:
         if (utf8_target) {
-           loceol = PL_regeol;
             while (scan < loceol && hardcount < max && *scan != '\n') {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             while (scan < loceol && hardcount < max && *scan != '\n') {
                 scan += UTF8SKIP(scan);
                 hardcount++;
@@ -6130,7 +6600,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         break;
      case SANY:
          if (utf8_target) {
         break;
      case SANY:
          if (utf8_target) {
-           loceol = PL_regeol;
             while (scan < loceol && hardcount < max) {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             while (scan < loceol && hardcount < max) {
                 scan += UTF8SKIP(scan);
                 hardcount++;
@@ -6139,35 +6608,69 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         else
             scan = loceol;
         break;
         else
             scan = loceol;
         break;
-    case CANY:
-       scan = loceol;
+    case CANY:  /* Move <scan> forward <max> bytes, unless goes off end */
+        if (utf8_target && scan + max < loceol) {
+
+            /* <loceol> hadn't been adjusted in the UTF-8 case */
+            scan +=  max;
+        }
+        else {
+            scan = loceol;
+        }
         break;
      case EXACT:
         break;
      case EXACT:
-       /* To get here, EXACTish nodes must have *byte* length == 1.  That
-        * means they match only characters in the string that can be expressed
-        * as a single byte.  For non-utf8 strings, that means a simple match.
-        * For utf8 strings, the character matched must be an invariant, or
-        * downgradable to a single byte.  The pattern's utf8ness is
-        * irrelevant, as since it's a single byte, it either isn't utf8, or if
-        * it is, it's an invariant */
+        assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
  
         c = (U8)*STRING(p);
  
         c = (U8)*STRING(p);
-       assert(! UTF_PATTERN || UNI_IS_INVARIANT(c));
  
  
-       if (! utf8_target || UNI_IS_INVARIANT(c)) {
+        /* Can use a simple loop if the pattern char to match on is invariant
+         * under UTF-8, or both target and pattern aren't UTF-8.  Note that we
+         * can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
+         * true iff it doesn't matter if the argument is in UTF-8 or not */
+        if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! UTF_PATTERN)) {
+            if (utf8_target && scan + max < loceol) {
+                /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
+                 * since here, to match at all, 1 char == 1 byte */
+                loceol = scan + max;
+            }
             while (scan < loceol && UCHARAT(scan) == c) {
                 scan++;
             }
         }
             while (scan < loceol && UCHARAT(scan) == c) {
                 scan++;
             }
         }
-       else {
+       else if (UTF_PATTERN) {
+            if (utf8_target) {
+                STRLEN scan_char_len;
+
+                /* When both target and pattern are UTF-8, we have to do
+                 * string EQ */
+                while (hardcount < max
+                       && scan + (scan_char_len = UTF8SKIP(scan)) <= loceol
+                       && scan_char_len <= STR_LEN(p)
+                       && memEQ(scan, STRING(p), scan_char_len))
+                {
+                    scan += scan_char_len;
+                    hardcount++;
+                }
+            }
+            else if (! UTF8_IS_ABOVE_LATIN1(c)) {
+
+                /* Target isn't utf8; convert the character in the UTF-8
+                 * pattern to non-UTF8, and do a simple loop */
+                c = TWO_BYTE_UTF8_TO_UNI(c, *(STRING(p) + 1));
+                while (scan < loceol && UCHARAT(scan) == c) {
+                    scan++;
+                }
+            } /* else pattern char is above Latin1, can't possibly match the
+                 non-UTF-8 target */
+        }
+        else {
  
  
-           /* Here, the string is utf8, and the pattern char is different
-            * in utf8 than not, so can't compare them directly.  Outside the
-            * loop, find the two utf8 bytes that represent c, and then
-            * look for those in sequence in the utf8 string */
+            /* Here, the string must be utf8; pattern isn't, and <c> is
+             * different in utf8 than not, so can't compare them directly.
+             * Outside the loop, find the two utf8 bytes that represent c, and
+             * then look for those in sequence in the utf8 string */
             U8 high = UTF8_TWO_BYTE_HI(c);
             U8 low = UTF8_TWO_BYTE_LO(c);
             U8 high = UTF8_TWO_BYTE_HI(c);
             U8 low = UTF8_TWO_BYTE_LO(c);
-           loceol = PL_regeol;
  
             while (hardcount < max
                     && scan + 1 < loceol
  
             while (hardcount < max
                     && scan + 1 < loceol
@@ -6179,6 +6682,7 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
             }
         }
         break;
             }
         }
         break;
+
      case EXACTFA:
         utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
         goto do_exactf;
      case EXACTFA:
         utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
         goto do_exactf;
@@ -6197,65 +6701,69 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
      case EXACTFU:
         utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
  
      case EXACTFU:
         utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
  
-       /* The comments for the EXACT case above apply as well to these fold
-        * ones */
-
-    do_exactf:
-       c = (U8)*STRING(p);
-       assert(! UTF_PATTERN || UNI_IS_INVARIANT(c));
-
-       if (utf8_target || OP(p) == EXACTFU_SS) { /* Use full Unicode fold matching */
-           char *tmpeol = loceol;
-           while (hardcount < max
-                   && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
-                                  STRING(p), NULL, 1, cBOOL(UTF_PATTERN), utf8_flags))
-           {
-               scan = tmpeol;
-               tmpeol = loceol;
-               hardcount++;
-           }
-
-           /* XXX Note that the above handles properly the German sharp s in
-            * the pattern matching ss in the string.  But it doesn't handle
-            * properly cases where the string contains say 'LIGATURE ff' and
-            * the pattern is 'f+'.  This would require, say, a new function or
-            * revised interface to foldEQ_utf8(), in which the maximum number
-            * of characters to match could be passed and it would return how
-            * many actually did.  This is just one of many cases where
-            * multi-char folds don't work properly, and so the fix is being
-            * deferred */
-       }
-       else {
-           U8 folded;
-
-           /* Here, the string isn't utf8 and c is a single byte; and either
-            * the pattern isn't utf8 or c is an invariant, so its utf8ness
-            * doesn't affect c.  Can just do simple comparisons for exact or
-            * fold matching. */
-           switch (OP(p)) {
-               case EXACTF: folded = PL_fold[c]; break;
-               case EXACTFA:
-               case EXACTFU_TRICKYFOLD:
-               case EXACTFU: folded = PL_fold_latin1[c]; break;
-               case EXACTFL: folded = PL_fold_locale[c]; break;
-               default: Perl_croak(aTHX_ "panic: Unexpected op %u", OP(p));
-           }
-           while (scan < loceol &&
-                  (UCHARAT(scan) == c || UCHARAT(scan) == folded))
-           {
-               scan++;
-           }
+    do_exactf: {
+        int c1, c2;
+        U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
+
+        assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
+
+        if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8)) {
+            if (c1 == CHRTEST_VOID) {
+                /* Use full Unicode fold matching */
+                char *tmpeol = PL_regeol;
+                STRLEN pat_len = (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1;
+                while (hardcount < max
+                        && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
+                                             STRING(p), NULL, pat_len,
+                                             cBOOL(UTF_PATTERN), utf8_flags))
+                {
+                    scan = tmpeol;
+                    tmpeol = PL_regeol;
+                    hardcount++;
+                }
+            }
+            else if (utf8_target) {
+                if (c1 == c2) {
+                    while (scan < loceol
+                           && hardcount < max
+                           && memEQ(scan, c1_utf8, UTF8SKIP(scan)))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
+                    }
+                }
+                else {
+                    while (scan < loceol
+                           && hardcount < max
+                           && (memEQ(scan, c1_utf8, UTF8SKIP(scan))
+                               || memEQ(scan, c2_utf8, UTF8SKIP(scan))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
+                    }
+                }
+            }
+            else if (c1 == c2) {
+                while (scan < loceol && UCHARAT(scan) == c1) {
+                    scan++;
+                }
+            }
+            else {
+                while (scan < loceol &&
+                    (UCHARAT(scan) == c1 || UCHARAT(scan) == c2))
+                {
+                    scan++;
+                }
+            }
         }
         break;
         }
         break;
-    case ANYOFV:
+    }
      case ANYOF:
      case ANYOF:
-       if (utf8_target || OP(p) == ANYOFV) {
+       if (utf8_target) {
             STRLEN inclasslen;
             STRLEN inclasslen;
-           loceol = PL_regeol;
-           inclasslen = loceol - scan;
             while (hardcount < max
             while (hardcount < max
-                  && ((inclasslen = loceol - scan) > 0)
-                  && reginclass(prog, p, (U8*)scan, &inclasslen, utf8_target))
+                   && scan + (inclasslen = UTF8SKIP(scan)) <= loceol
+                  && reginclass(prog, p, (U8*)scan, utf8_target))
             {
                 scan += inclasslen;
                 hardcount++;
             {
                 scan += inclasslen;
                 hardcount++;
@@ -6268,7 +6776,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
      case ALNUMU:
         if (utf8_target) {
      utf8_wordchar:
      case ALNUMU:
         if (utf8_target) {
      utf8_wordchar:
-           loceol = PL_regeol;
             LOAD_UTF8_CHARCLASS_ALNUM();
             while (hardcount < max && scan < loceol &&
                     swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
             LOAD_UTF8_CHARCLASS_ALNUM();
             while (hardcount < max && scan < loceol &&
                     swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
@@ -6290,6 +6797,12 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         }
         break;
      case ALNUMA:
         }
         break;
      case ALNUMA:
+        if (utf8_target && scan + max < loceol) {
+
+            /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
+             * since here, to match, 1 char == 1 byte */
+            loceol = scan + max;
+        }
         while (scan < loceol && isWORDCHAR_A((U8) *scan)) {
             scan++;
         }
         while (scan < loceol && isWORDCHAR_A((U8) *scan)) {
             scan++;
         }
@@ -6297,7 +6810,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
      case ALNUML:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
      case ALNUML:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
-           loceol = PL_regeol;
             while (hardcount < max && scan < loceol &&
                    isALNUM_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
             while (hardcount < max && scan < loceol &&
                    isALNUM_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
@@ -6313,7 +6825,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
  
      utf8_Nwordchar:
  
  
      utf8_Nwordchar:
  
-           loceol = PL_regeol;
             LOAD_UTF8_CHARCLASS_ALNUM();
             while (hardcount < max && scan < loceol &&
                     ! swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
             LOAD_UTF8_CHARCLASS_ALNUM();
             while (hardcount < max && scan < loceol &&
                     ! swash_fetch(PL_utf8_alnum, (U8*)scan, utf8_target))
@@ -6336,14 +6847,23 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         break;
  
      case POSIXA:
         break;
  
      case POSIXA:
-       while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
+        if (utf8_target && scan + max < loceol) {
+
+            /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
+             * since here, to match, 1 char == 1 byte */
+            loceol = scan + max;
+        }
+        while (scan < loceol && _generic_isCC_A((U8) *scan, FLAGS(p))) {
             scan++;
         }
         break;
      case NPOSIXA:
         if (utf8_target) {
             scan++;
         }
         break;
      case NPOSIXA:
         if (utf8_target) {
-           while (scan < loceol && ! _generic_isCC_A((U8) *scan, FLAGS(p))) {
+           while (scan < loceol && hardcount < max
+                   && ! _generic_isCC_A((U8) *scan, FLAGS(p)))
+            {
                 scan += UTF8SKIP(scan);
                 scan += UTF8SKIP(scan);
+                hardcount++;
             }
         }
         else {
             }
         }
         else {
@@ -6354,8 +6874,11 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         break;
      case NALNUMA:
         if (utf8_target) {
         break;
      case NALNUMA:
         if (utf8_target) {
-           while (scan < loceol && ! isWORDCHAR_A((U8) *scan)) {
+           while (scan < loceol && hardcount < max
+                   && ! isWORDCHAR_A((U8) *scan))
+            {
                 scan += UTF8SKIP(scan);
                 scan += UTF8SKIP(scan);
+                hardcount++;
             }
         }
         else {
             }
         }
         else {
@@ -6367,7 +6890,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
      case NALNUML:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
      case NALNUML:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
-           loceol = PL_regeol;
             while (hardcount < max && scan < loceol &&
                    !isALNUM_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
             while (hardcount < max && scan < loceol &&
                    !isALNUM_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
@@ -6383,7 +6905,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
  
      utf8_space:
  
  
      utf8_space:
  
-           loceol = PL_regeol;
             LOAD_UTF8_CHARCLASS_SPACE();
             while (hardcount < max && scan < loceol &&
                    (*scan == ' ' ||
             LOAD_UTF8_CHARCLASS_SPACE();
             while (hardcount < max && scan < loceol &&
                    (*scan == ' ' ||
@@ -6409,6 +6930,12 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         }
         break;
      case SPACEA:
         }
         break;
      case SPACEA:
+        if (utf8_target && scan + max < loceol) {
+
+            /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
+             * since here, to match, 1 char == 1 byte */
+            loceol = scan + max;
+        }
         while (scan < loceol && isSPACE_A((U8) *scan)) {
             scan++;
         }
         while (scan < loceol && isSPACE_A((U8) *scan)) {
             scan++;
         }
@@ -6416,7 +6943,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
      case SPACEL:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
      case SPACEL:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
-           loceol = PL_regeol;
             while (hardcount < max && scan < loceol &&
                    isSPACE_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
             while (hardcount < max && scan < loceol &&
                    isSPACE_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
@@ -6432,7 +6958,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
  
      utf8_Nspace:
  
  
      utf8_Nspace:
  
-           loceol = PL_regeol;
             LOAD_UTF8_CHARCLASS_SPACE();
             while (hardcount < max && scan < loceol &&
                    ! (*scan == ' ' ||
             LOAD_UTF8_CHARCLASS_SPACE();
             while (hardcount < max && scan < loceol &&
                    ! (*scan == ' ' ||
@@ -6459,8 +6984,11 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         break;
      case NSPACEA:
         if (utf8_target) {
         break;
      case NSPACEA:
         if (utf8_target) {
-           while (scan < loceol && ! isSPACE_A((U8) *scan)) {
+           while (hardcount < max && scan < loceol
+                  && ! isSPACE_A((U8) *scan))
+            {
                 scan += UTF8SKIP(scan);
                 scan += UTF8SKIP(scan);
+               hardcount++;
             }
         }
         else {
             }
         }
         else {
@@ -6472,7 +7000,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
      case NSPACEL:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
      case NSPACEL:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
-           loceol = PL_regeol;
             while (hardcount < max && scan < loceol &&
                    !isSPACE_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
             while (hardcount < max && scan < loceol &&
                    !isSPACE_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
@@ -6485,7 +7012,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         break;
      case DIGIT:
         if (utf8_target) {
         break;
      case DIGIT:
         if (utf8_target) {
-           loceol = PL_regeol;
             LOAD_UTF8_CHARCLASS_DIGIT();
             while (hardcount < max && scan < loceol &&
                    swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
             LOAD_UTF8_CHARCLASS_DIGIT();
             while (hardcount < max && scan < loceol &&
                    swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
@@ -6498,6 +7024,12 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         }
         break;
      case DIGITA:
         }
         break;
      case DIGITA:
+        if (utf8_target && scan + max < loceol) {
+
+            /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
+             * since here, to match, 1 char == 1 byte */
+            loceol = scan + max;
+        }
         while (scan < loceol && isDIGIT_A((U8) *scan)) {
             scan++;
         }
         while (scan < loceol && isDIGIT_A((U8) *scan)) {
             scan++;
         }
@@ -6505,7 +7037,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
      case DIGITL:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
      case DIGITL:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
-           loceol = PL_regeol;
             while (hardcount < max && scan < loceol &&
                    isDIGIT_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
             while (hardcount < max && scan < loceol &&
                    isDIGIT_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
@@ -6518,7 +7049,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         break;
      case NDIGIT:
         if (utf8_target) {
         break;
      case NDIGIT:
         if (utf8_target) {
-           loceol = PL_regeol;
             LOAD_UTF8_CHARCLASS_DIGIT();
             while (hardcount < max && scan < loceol &&
                    !swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
             LOAD_UTF8_CHARCLASS_DIGIT();
             while (hardcount < max && scan < loceol &&
                    !swash_fetch(PL_utf8_digit, (U8*)scan, utf8_target)) {
@@ -6532,8 +7062,10 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         break;
      case NDIGITA:
         if (utf8_target) {
         break;
      case NDIGITA:
         if (utf8_target) {
-           while (scan < loceol && ! isDIGIT_A((U8) *scan)) {
+           while (hardcount < max && scan < loceol
+                  && ! isDIGIT_A((U8) *scan)) {
                 scan += UTF8SKIP(scan);
                 scan += UTF8SKIP(scan);
+                hardcount++;
             }
         }
         else {
             }
         }
         else {
@@ -6545,7 +7077,6 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
      case NDIGITL:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
      case NDIGITL:
         PL_reg_flags |= RF_tainted;
         if (utf8_target) {
-           loceol = PL_regeol;
             while (hardcount < max && scan < loceol &&
                    !isDIGIT_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
             while (hardcount < max && scan < loceol &&
                    !isDIGIT_LC_utf8((U8*)scan)) {
                 scan += UTF8SKIP(scan);
@@ -6558,84 +7089,106 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
         break;
      case LNBREAK:
          if (utf8_target) {
         break;
      case LNBREAK:
          if (utf8_target) {
-           loceol = PL_regeol;
-           while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
+           while (hardcount < max && scan < loceol &&
+                    (c=is_LNBREAK_utf8_safe(scan, loceol))) {
                 scan += c;
                 hardcount++;
             }
         } else {
                 scan += c;
                 hardcount++;
             }
         } else {
-           /*
-             LNBREAK can match two latin chars, which is ok,
-             because we have a null terminated string, but we
-             have to use hardcount in this situation
-           */
-           while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
+            /* LNBREAK can match one or two latin chars, which is ok, but we
+             * have to use hardcount in this situation, and throw away the
+             * adjustment to <loceol> done before the switch statement */
+            loceol = PL_regeol;
+           while (scan < loceol && (c=is_LNBREAK_latin1_safe(scan, loceol))) {
                 scan+=c;
                 hardcount++;
             }
                 scan+=c;
                 hardcount++;
             }
-       }       
+       }
         break;
      case HORIZWS:
          if (utf8_target) {
         break;
      case HORIZWS:
          if (utf8_target) {
-           loceol = PL_regeol;
-           while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
+           while (hardcount < max && scan < loceol &&
+                    (c=is_HORIZWS_utf8_safe(scan, loceol)))
+            {
                 scan += c;
                 hardcount++;
             }
         } else {
                 scan += c;
                 hardcount++;
             }
         } else {
-           while (scan < loceol && is_HORIZWS_latin1(scan)) 
+           while (scan < loceol && is_HORIZWS_latin1_safe(scan, loceol)) 
                 scan++;         
         }       
         break;
      case NHORIZWS:
          if (utf8_target) {
                 scan++;         
         }       
         break;
      case NHORIZWS:
          if (utf8_target) {
-           loceol = PL_regeol;
-           while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
+           while (hardcount < max && scan < loceol &&
+                        !is_HORIZWS_utf8_safe(scan, loceol))
+            {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         } else {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         } else {
-           while (scan < loceol && !is_HORIZWS_latin1(scan))
+           while (scan < loceol && !is_HORIZWS_latin1_safe(scan, loceol))
                 scan++;
  
         }       
         break;
      case VERTWS:
          if (utf8_target) {
                 scan++;
  
         }       
         break;
      case VERTWS:
          if (utf8_target) {
-           loceol = PL_regeol;
-           while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
+           while (hardcount < max && scan < loceol &&
+                            (c=is_VERTWS_utf8_safe(scan, loceol)))
+            {
                 scan += c;
                 hardcount++;
             }
         } else {
                 scan += c;
                 hardcount++;
             }
         } else {
-           while (scan < loceol && is_VERTWS_latin1(scan)) 
+           while (scan < loceol && is_VERTWS_latin1_safe(scan, loceol)) 
                 scan++;
  
         }       
         break;
      case NVERTWS:
          if (utf8_target) {
                 scan++;
  
         }       
         break;
      case NVERTWS:
          if (utf8_target) {
-           loceol = PL_regeol;
-           while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
+           while (hardcount < max && scan < loceol &&
+                                !is_VERTWS_utf8_safe(scan, loceol))
+            {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         } else {
                 scan += UTF8SKIP(scan);
                 hardcount++;
             }
         } else {
-           while (scan < loceol && !is_VERTWS_latin1(scan)) 
+           while (scan < loceol && !is_VERTWS_latin1_safe(scan, loceol)) 
                 scan++;
            
         }       
         break;
  
                 scan++;
            
         }       
         break;
  
-    default:           /* Called on something of 0 width. */
-       break;          /* So match right here or not at all. */
+    case BOUND:
+    case BOUNDA:
+    case BOUNDL:
+    case BOUNDU:
+    case EOS:
+    case GPOS:
+    case KEEPS:
+    case NBOUND:
+    case NBOUNDA:
+    case NBOUNDL:
+    case NBOUNDU:
+    case OPFAIL:
+    case SBOL:
+    case SEOL:
+        /* These are all 0 width, so match right here or not at all. */
+        break;
+
+    default:
+        Perl_croak(aTHX_ "panic: regrepeat() called with unrecognized node type %d='%s'", OP(p), PL_reg_name[OP(p)]);
+        assert(0); /* NOTREACHED */
+
      }
  
      if (hardcount)
         c = hardcount;
      else
      }
  
      if (hardcount)
         c = hardcount;
      else
-       c = scan - PL_reginput;
-    PL_reginput = scan;
+       c = scan - *startposp;
+    *startposp = scan;
  
      DEBUG_r({
         GET_RE_DEBUG_FLAGS_DECL;
  
      DEBUG_r({
         GET_RE_DEBUG_FLAGS_DECL;
@@ -6655,32 +7208,35 @@ S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
  #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
  /*
  - regclass_swash - prepare the utf8 swash.  Wraps the shared core version to
  #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
  /*
  - regclass_swash - prepare the utf8 swash.  Wraps the shared core version to
-create a copy so that changes the caller makes won't change the shared one
+create a copy so that changes the caller makes won't change the shared one.
+If <altsvp> is non-null, will return NULL in it, for back-compat.
   */
  SV *
  Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
  {
      PERL_ARGS_ASSERT_REGCLASS_SWASH;
   */
  SV *
  Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
  {
      PERL_ARGS_ASSERT_REGCLASS_SWASH;
-    return newSVsv(core_regclass_swash(prog, node, doinit, listsvp, altsvp));
+
+    if (altsvp) {
+        *altsvp = NULL;
+    }
+
+    return newSVsv(core_regclass_swash(prog, node, doinit, listsvp));
  }
  #endif
  
  STATIC SV *
  }
  #endif
  
  STATIC SV *
-S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
+S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp)
  {
      /* Returns the swash for the input 'node' in the regex 'prog'.
       * If <doinit> is true, will attempt to create the swash if not already
       *   done.
       * If <listsvp> is non-null, will return the swash initialization string in
       *   it.
  {
      /* Returns the swash for the input 'node' in the regex 'prog'.
       * If <doinit> is true, will attempt to create the swash if not already
       *   done.
       * If <listsvp> is non-null, will return the swash initialization string in
       *   it.
-     * If <altsvp> is non-null, will return the alternates to the regular swash
-     *   in it
       * Tied intimately to how regcomp.c sets up the data structure */
  
      dVAR;
      SV *sw  = NULL;
      SV *si  = NULL;
       * Tied intimately to how regcomp.c sets up the data structure */
  
      dVAR;
      SV *sw  = NULL;
      SV *si  = NULL;
-    SV *alt = NULL;
      SV*  invlist = NULL;
  
      RXi_GET_DECL(prog,progi);
      SV*  invlist = NULL;
  
      RXi_GET_DECL(prog,progi);
@@ -6697,20 +7253,21 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo
             SV * const rv = MUTABLE_SV(data->data[n]);
             AV * const av = MUTABLE_AV(SvRV(rv));
             SV **const ary = AvARRAY(av);
             SV * const rv = MUTABLE_SV(data->data[n]);
             AV * const av = MUTABLE_AV(SvRV(rv));
             SV **const ary = AvARRAY(av);
-           bool invlist_has_user_defined_property;
+           U8 swash_init_flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
         
             si = *ary;  /* ary[0] = the string to initialize the swash with */
  
         
             si = *ary;  /* ary[0] = the string to initialize the swash with */
  
-           /* Elements 3 and 4 are either both present or both absent. [3] is
-            * any inversion list generated at compile time; [4] indicates if
+           /* Elements 2 and 3 are either both present or both absent. [2] is
+            * any inversion list generated at compile time; [3] indicates if
              * that inversion list has any user-defined properties in it. */
              * that inversion list has any user-defined properties in it. */
-           if (av_len(av) >= 3) {
-               invlist = ary[3];
-               invlist_has_user_defined_property = cBOOL(SvUV(ary[4]));
+           if (av_len(av) >= 2) {
+               invlist = ary[2];
+               if (SvUV(ary[3])) {
+                    swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
+                }
             }
             else {
                 invlist = NULL;
             }
             else {
                 invlist = NULL;
-               invlist_has_user_defined_property = FALSE;
             }
  
             /* Element [1] is reserved for the set-up swash.  If already there,
             }
  
             /* Element [1] is reserved for the set-up swash.  If already there,
@@ -6725,38 +7282,23 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo
                                       si,
                                       1, /* binary */
                                       0, /* not from tr/// */
                                       si,
                                       1, /* binary */
                                       0, /* not from tr/// */
-                                     FALSE, /* is error if can't find
-                                               property */
                                       invlist,
                                       invlist,
-                                     invlist_has_user_defined_property);
+                                     &swash_init_flags);
                 (void)av_store(av, 1, sw);
             }
                 (void)av_store(av, 1, sw);
             }
-
-           /* Element [2] is for any multi-char folds.  Note that is a
-            * fundamentally flawed design, because can't backtrack and try
-            * again.  See [perl #89774] */
-           if (SvTYPE(ary[2]) == SVt_PVAV) {
-               alt = ary[2];
-           }
         }
      }
         
      if (listsvp) {
         SV* matches_string = newSVpvn("", 0);
         }
      }
         
      if (listsvp) {
         SV* matches_string = newSVpvn("", 0);
-       SV** invlistsvp;
  
         /* Use the swash, if any, which has to have incorporated into it all
          * possibilities */
  
         /* Use the swash, if any, which has to have incorporated into it all
          * possibilities */
-       if (   sw
-           && SvROK(sw)
-           && SvTYPE(SvRV(sw)) == SVt_PVHV
-           && (invlistsvp = hv_fetchs(MUTABLE_HV(SvRV(sw)), "INVLIST", FALSE)))
-       {
-           invlist = *invlistsvp;
-       }
-       else if (si && si != &PL_sv_undef) {
+       if ((! sw || (invlist = _get_swash_invlist(sw)) == NULL)
+            && (si && si != &PL_sv_undef))
+        {
  
  
-           /* If no swash, use the input nitialization string, if available */
+           /* If no swash, use the input initialization string, if available */
             sv_catsv(matches_string, si);
         }
  
             sv_catsv(matches_string, si);
         }
  
@@ -6768,9 +7310,6 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo
         *listsvp = matches_string;
      }
  
         *listsvp = matches_string;
      }
  
-    if (altsvp)
-       *altsvp  = alt;
-
      return sw;
  }
  
      return sw;
  }
  
@@ -6779,15 +7318,9 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo
   
    n is the ANYOF regnode
    p is the target string
   
    n is the ANYOF regnode
    p is the target string
-  lenp is pointer to the maximum number of bytes of how far to go in p
-    (This is assumed wthout checking to always be at least the current
-    character's size)
    utf8_target tells whether p is in UTF-8.
  
    utf8_target tells whether p is in UTF-8.
  
-  Returns true if matched; false otherwise.  If lenp is not NULL, on return
-  from a successful match, the value it points to will be updated to how many
-  bytes in p were matched.  If there was no match, the value is undefined,
-  possibly changed from the input.
+  Returns true if matched; false otherwise.
  
    Note that this can be a synthetic start class, a combination of various
    nodes, so things you think might be mutually exclusive, such as locale,
  
    Note that this can be a synthetic start class, a combination of various
    nodes, so things you think might be mutually exclusive, such as locale,
@@ -6796,19 +7329,19 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo
   */
  
  STATIC bool
   */
  
  STATIC bool
-S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, STRLEN* lenp, register const bool utf8_target)
+S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, register const bool utf8_target)
  {
      dVAR;
      const char flags = ANYOF_FLAGS(n);
      bool match = FALSE;
      UV c = *p;
  {
      dVAR;
      const char flags = ANYOF_FLAGS(n);
      bool match = FALSE;
      UV c = *p;
-    STRLEN c_len = 0;
-    STRLEN maxlen;
  
      PERL_ARGS_ASSERT_REGINCLASS;
  
  
      PERL_ARGS_ASSERT_REGINCLASS;
  
-    /* If c is not already the code point, get it */
-    if (utf8_target && !UTF8_IS_INVARIANT(c)) {
+    /* If c is not already the code point, get it.  Note that
+     * UTF8_IS_INVARIANT() works even if not in UTF-8 */
+    if (! UTF8_IS_INVARIANT(c) && utf8_target) {
+        STRLEN c_len = 0;
         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
                 | UTF8_ALLOW_FFFF | UTF8_CHECK_ONLY);
         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len,
                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
                 | UTF8_ALLOW_FFFF | UTF8_CHECK_ONLY);
@@ -6817,21 +7350,6 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
         if (c_len == (STRLEN)-1)
             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
      }
         if (c_len == (STRLEN)-1)
             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
      }
-    else {
-       c_len = 1;
-    }
-
-    /* Use passed in max length, or one character if none passed in or less
-     * than one character.  And assume will match just one character.  This is
-     * overwritten later if matched more. */
-    if (lenp) {
-       maxlen = (*lenp > c_len) ? *lenp : c_len;
-       *lenp = c_len;
-
-    }
-    else {
-       maxlen = c_len;
-    }
  
      /* If this character is potentially in the bitmap, check it */
      if (c < 256) {
  
      /* If this character is potentially in the bitmap, check it */
      if (c < 256) {
@@ -6843,11 +7361,10 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
         {
             match = TRUE;
         }
         {
             match = TRUE;
         }
-
         else if (flags & ANYOF_LOCALE) {
             PL_reg_flags |= RF_tainted;
  
         else if (flags & ANYOF_LOCALE) {
             PL_reg_flags |= RF_tainted;
  
-           if ((flags & ANYOF_LOC_NONBITMAP_FOLD)
+           if ((flags & ANYOF_LOC_FOLD)
                  && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
             {
                 match = TRUE;
                  && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
             {
                 match = TRUE;
@@ -6891,7 +7408,7 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
      }
  
      /* If the bitmap didn't (or couldn't) match, and something outside the
      }
  
      /* If the bitmap didn't (or couldn't) match, and something outside the
-     * bitmap could match, try that.  Locale nodes specifiy completely the
+     * bitmap could match, try that.  Locale nodes specify completely the
       * behavior of code points in the bit map (otherwise, a utf8 target would
       * cause them to be treated as Unicode and not locale), except in
       * the very unlikely event when this node is a synthetic start class, which
       * behavior of code points in the bit map (otherwise, a utf8 target would
       * cause them to be treated as Unicode and not locale), except in
       * the very unlikely event when this node is a synthetic start class, which
@@ -6910,167 +7427,19 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
                              || (! (flags & ANYOF_LOCALE))
                              || (flags & ANYOF_IS_SYNTHETIC)))))
         {
                              || (! (flags & ANYOF_LOCALE))
                              || (flags & ANYOF_IS_SYNTHETIC)))))
         {
-           AV *av;
-           SV * const sw = core_regclass_swash(prog, n, TRUE, 0, (SV**)&av);
-
+           SV * const sw = core_regclass_swash(prog, n, TRUE, 0);
             if (sw) {
                 U8 * utf8_p;
                 if (utf8_target) {
                     utf8_p = (U8 *) p;
             if (sw) {
                 U8 * utf8_p;
                 if (utf8_target) {
                     utf8_p = (U8 *) p;
-               } else {
-
-                   /* Not utf8.  Convert as much of the string as available up
-                    * to the limit of how far the (single) character in the
-                    * pattern can possibly match (no need to go further).  If
-                    * the node is a straight ANYOF or not folding, it can't
-                    * match more than one.  Otherwise, It can match up to how
-                    * far a single char can fold to.  Since not utf8, each
-                    * character is a single byte, so the max it can be in
-                    * bytes is the same as the max it can be in characters */
-                   STRLEN len = (OP(n) == ANYOF
-                                 || ! (flags & ANYOF_LOC_NONBITMAP_FOLD))
-                                 ? 1
-                                 : (maxlen < UTF8_MAX_FOLD_CHAR_EXPAND)
-                                   ? maxlen
-                                   : UTF8_MAX_FOLD_CHAR_EXPAND;
+               } else { /* Convert to utf8 */
+                   STRLEN len = 1;
                     utf8_p = bytes_to_utf8(p, &len);
                 }
  
                     utf8_p = bytes_to_utf8(p, &len);
                 }
  
-               if (swash_fetch(sw, utf8_p, TRUE))
+               if (swash_fetch(sw, utf8_p, TRUE)) {
                     match = TRUE;
                     match = TRUE;
-               else if (flags & ANYOF_LOC_NONBITMAP_FOLD) {
-
-                   /* Here, we need to test if the fold of the target string
-                    * matches.  The non-multi char folds have all been moved to
-                     * the compilation phase, and the multi-char folds have
-                     * been stored by regcomp into 'av'; we linearly check to
-                     * see if any match the target string (folded).   We know
-                     * that the originals were each one character, but we don't
-                     * currently know how many characters/bytes each folded to,
-                     * except we do know that there are small limits imposed by
-                     * Unicode.  XXX A performance enhancement would be to have
-                     * regcomp.c store the max number of chars/bytes that are
-                     * in an av entry, as, say the 0th element.  Even better
-                     * would be to have a hash of the few characters that can
-                     * start a multi-char fold to the max number of chars of
-                     * those folds.
-                    *
-                    * If there is a match, we will need to advance (if lenp is
-                    * specified) the match pointer in the target string.  But
-                    * what we are comparing here isn't that string directly,
-                    * but its fold, whose length may differ from the original.
-                    * As we go along in constructing the fold, therefore, we
-                    * create a map so that we know how many bytes in the
-                    * source to advance given that we have matched a certain
-                    * number of bytes in the fold.  This map is stored in
-                    * 'map_fold_len_back'.  Let n mean the number of bytes in
-                    * the fold of the first character that we are folding.
-                    * Then map_fold_len_back[n] is set to the number of bytes
-                    * in that first character.  Similarly let m be the
-                    * corresponding number for the second character to be
-                    * folded.  Then map_fold_len_back[n+m] is set to the
-                    * number of bytes occupied by the first two source
-                    * characters. ... */
-                   U8 map_fold_len_back[UTF8_MAXBYTES_CASE+1] = { 0 };
-                   U8 folded[UTF8_MAXBYTES_CASE+1];
-                   STRLEN foldlen = 0; /* num bytes in fold of 1st char */
-                   STRLEN total_foldlen = 0; /* num bytes in fold of all
-                                                 chars */
-
-                   if (OP(n) == ANYOF || maxlen == 1 || ! lenp || ! av) {
-
-                       /* Here, only need to fold the first char of the target
-                        * string.  It the source wasn't utf8, is 1 byte long */
-                       to_utf8_fold(utf8_p, folded, &foldlen);
-                       total_foldlen = foldlen;
-                       map_fold_len_back[foldlen] = (utf8_target)
-                                                    ? UTF8SKIP(utf8_p)
-                                                    : 1;
-                   }
-                   else {
-
-                       /* Here, need to fold more than the first char.  Do so
-                        * up to the limits */
-                       U8* source_ptr = utf8_p;    /* The source for the fold
-                                                      is the regex target
-                                                      string */
-                       U8* folded_ptr = folded;
-                       U8* e = utf8_p + maxlen;    /* Can't go beyond last
-                                                      available byte in the
-                                                      target string */
-                       U8 i;
-                       for (i = 0;
-                            i < UTF8_MAX_FOLD_CHAR_EXPAND && source_ptr < e;
-                            i++)
-                       {
-
-                           /* Fold the next character */
-                           U8 this_char_folded[UTF8_MAXBYTES_CASE+1];
-                           STRLEN this_char_foldlen;
-                           to_utf8_fold(source_ptr,
-                                        this_char_folded,
-                                        &this_char_foldlen);
-
-                           /* Bail if it would exceed the byte limit for
-                            * folding a single char. */
-                           if (this_char_foldlen + folded_ptr - folded >
-                                                           UTF8_MAXBYTES_CASE)
-                           {
-                               break;
-                           }
-
-                           /* Add the fold of this character */
-                           Copy(this_char_folded,
-                                folded_ptr,
-                                this_char_foldlen,
-                                U8);
-                           source_ptr += UTF8SKIP(source_ptr);
-                           folded_ptr += this_char_foldlen;
-                           total_foldlen = folded_ptr - folded;
-
-                           /* Create map from the number of bytes in the fold
-                            * back to the number of bytes in the source.  If
-                            * the source isn't utf8, the byte count is just
-                            * the number of characters so far */
-                           map_fold_len_back[total_foldlen]
-                                                     = (utf8_target)
-                                                       ? source_ptr - utf8_p
-                                                       : i + 1;
-                       }
-                       *folded_ptr = '\0';
-                   }
-
-
-                   /* Do the linear search to see if the fold is in the list
-                    * of multi-char folds. */
-                   if (av) {
-                       I32 i;
-                       for (i = 0; i <= av_len(av); i++) {
-                           SV* const sv = *av_fetch(av, i, FALSE);
-                           STRLEN len;
-                           const char * const s = SvPV_const(sv, len);
-
-                           if (len <= total_foldlen
-                               && memEQ(s, (char*)folded, len)
-
-                                  /* If 0, means matched a partial char. See
-                                   * [perl #90536] */
-                               && map_fold_len_back[len])
-                           {
-
-                               /* Advance the target string ptr to account for
-                                * this fold, but have to translate from the
-                                * folded length to the corresponding source
-                                * length. */
-                               if (lenp) {
-                                   *lenp = map_fold_len_back[len];
-                               }
-                               match = TRUE;
-                               break;
-                           }
-                       }
-                   }
-               }
+                }
  
                 /* If we allocated a string above, free it */
                 if (! utf8_target) Safefree(utf8_p);
  
                 /* If we allocated a string above, free it */
                 if (! utf8_target) Safefree(utf8_p);
@@ -7086,7 +7455,8 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
          }
      }
  
          }
      }
  
-    return (flags & ANYOF_INVERT) ? !match : match;
+    /* The xor complements the return if to invert: 1^1 = 0, 1^0 = 1 */
+    return cBOOL(flags & ANYOF_INVERT) ^ match;
  }
  
  STATIC U8 *
  }
  
  STATIC U8 *
@@ -7190,6 +7560,8 @@ restore_pos(pTHX_ void *arg)
         if (PL_reg_oldsaved) {
             rex->subbeg = PL_reg_oldsaved;
             rex->sublen = PL_reg_oldsavedlen;
         if (PL_reg_oldsaved) {
             rex->subbeg = PL_reg_oldsaved;
             rex->sublen = PL_reg_oldsavedlen;
+           rex->suboffset = PL_reg_oldsavedoffset;
+           rex->subcoffset = PL_reg_oldsavedcoffset;
  #ifdef PERL_OLD_COPY_ON_WRITE
             rex->saved_copy = PL_nrs;
  #endif
  #ifdef PERL_OLD_COPY_ON_WRITE
             rex->saved_copy = PL_nrs;
  #endif
@@ -7204,6 +7576,9 @@ restore_pos(pTHX_ void *arg)
  STATIC void
  S_to_utf8_substr(pTHX_ register regexp *prog)
  {
  STATIC void
  S_to_utf8_substr(pTHX_ register regexp *prog)
  {
+    /* Converts substr fields in prog from bytes to UTF-8, calling fbm_compile
+     * on the converted value */
+
      int i = 1;
  
      PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
      int i = 1;
  
      PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
@@ -7232,9 +7607,12 @@ S_to_utf8_substr(pTHX_ register regexp *prog)
      } while (i--);
  }
  
      } while (i--);
  }
  
-STATIC void
+STATIC bool
  S_to_byte_substr(pTHX_ register regexp *prog)
  {
  S_to_byte_substr(pTHX_ register regexp *prog)
  {
+    /* Converts substr fields in prog from UTF-8 to bytes, calling fbm_compile
+     * on the converted value; returns FALSE if can't be converted. */
+
      dVAR;
      int i = 1;
  
      dVAR;
      int i = 1;
  
@@ -7244,25 +7622,93 @@ S_to_byte_substr(pTHX_ register regexp *prog)
         if (prog->substrs->data[i].utf8_substr
             && !prog->substrs->data[i].substr) {
             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
         if (prog->substrs->data[i].utf8_substr
             && !prog->substrs->data[i].substr) {
             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
-           if (sv_utf8_downgrade(sv, TRUE)) {
-               if (SvVALID(prog->substrs->data[i].utf8_substr)) {
-                   if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
-                       /* Trim the trailing \n that fbm_compile added last
-                          time.  */
-                       SvCUR_set(sv, SvCUR(sv) - 1);
-                       fbm_compile(sv, FBMcf_TAIL);
-                   } else
-                       fbm_compile(sv, 0);
-               }
-           } else {
-               SvREFCNT_dec(sv);
-               sv = &PL_sv_undef;
-           }
+           if (! sv_utf8_downgrade(sv, TRUE)) {
+                return FALSE;
+            }
+            if (SvVALID(prog->substrs->data[i].utf8_substr)) {
+                if (SvTAIL(prog->substrs->data[i].utf8_substr)) {
+                    /* Trim the trailing \n that fbm_compile added last
+                        time.  */
+                    SvCUR_set(sv, SvCUR(sv) - 1);
+                    fbm_compile(sv, FBMcf_TAIL);
+                } else
+                    fbm_compile(sv, 0);
+            }
             prog->substrs->data[i].substr = sv;
             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
                 prog->check_substr = sv;
         }
      } while (i--);
             prog->substrs->data[i].substr = sv;
             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
                 prog->check_substr = sv;
         }
      } while (i--);
+
+    return TRUE;
+}
+
+/* These constants are for finding GCB=LV and GCB=LVT.  These are for the
+ * pre-composed Hangul syllables, which are all in a contiguous block and
+ * arranged there in such a way so as to facilitate alorithmic determination of
+ * their characteristics.  As such, they don't need a swash, but can be
+ * determined by simple arithmetic.  Almost all are GCB=LVT, but every 28th one
+ * is a GCB=LV */
+#define SBASE 0xAC00    /* Start of block */
+#define SCount 11172    /* Length of block */
+#define TCount 28
+
+#if 0   /* This routine is not currently used */
+PERL_STATIC_INLINE bool
+S_is_utf8_X_LV(pTHX_ const U8 *p)
+{
+    /* Unlike most other similarly named routines here, this does not create a
+     * swash, so swash_fetch() cannot be used on PL_utf8_X_LV. */
+
+    dVAR;
+
+    UV cp = valid_utf8_to_uvchr(p, NULL);
+
+    PERL_ARGS_ASSERT_IS_UTF8_X_LV;
+
+    /* The earliest Unicode releases did not have these precomposed Hangul
+     * syllables.  Set to point to undef in that case, so will return false on
+     * every call */
+    if (! PL_utf8_X_LV) {   /* Set up if this is the first time called */
+        PL_utf8_X_LV = swash_init("utf8", "_X_GCB_LV", &PL_sv_undef, 1, 0);
+        if (_invlist_len(_get_swash_invlist(PL_utf8_X_LV)) == 0) {
+            SvREFCNT_dec(PL_utf8_X_LV);
+            PL_utf8_X_LV = &PL_sv_undef;
+        }
+    }
+
+    return (PL_utf8_X_LV != &PL_sv_undef
+            && cp >= SBASE && cp < SBASE + SCount
+            && (cp - SBASE) % TCount == 0); /* Only every TCount one is LV */
+}
+#endif
+
+PERL_STATIC_INLINE bool
+S_is_utf8_X_LVT(pTHX_ const U8 *p)
+{
+    /* Unlike most other similarly named routines here, this does not create a
+     * swash, so swash_fetch() cannot be used on PL_utf8_X_LVT. */
+
+    dVAR;
+
+    UV cp = valid_utf8_to_uvchr(p, NULL);
+
+    PERL_ARGS_ASSERT_IS_UTF8_X_LVT;
+
+    /* The earliest Unicode releases did not have these precomposed Hangul
+     * syllables.  Set to point to undef in that case, so will return false on
+     * every call */
+    if (! PL_utf8_X_LVT) {   /* Set up if this is the first time called */
+        PL_utf8_X_LVT = swash_init("utf8", "_X_GCB_LVT", &PL_sv_undef, 1, 0);
+        if (_invlist_len(_get_swash_invlist(PL_utf8_X_LVT)) == 0) {
+            SvREFCNT_dec(PL_utf8_X_LVT);
+            PL_utf8_X_LVT = &PL_sv_undef;
+        }
+    }
+
+    return (PL_utf8_X_LVT != &PL_sv_undef
+            && cp >= SBASE && cp < SBASE + SCount
+            && (cp - SBASE) % TCount != 0); /* All but every TCount one is LV */
  }
  
  /*
  }
  
  /*