X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/0a5ed81e6617c9229cc1ea042e9a70c3ec63fd65..975d8916e99aadf059b284a824eff315e49a1825:/regexec.c

diff --git a/regexec.c b/regexec.c
index 48ed8c3..ee961e7 100644
--- a/regexec.c
+++ b/regexec.c
@@ -218,7 +218,7 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
     const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
     const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
     I32 p;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGCPPUSH;
 
@@ -328,7 +328,7 @@ S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p _pDEPTH)
 {
     UV i;
     U32 paren;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGCPPOP;
 
@@ -579,7 +579,7 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
             span_word |= span_word << 4;
 
             /* That reduces the problem to what this function solves */
-            return s + _variant_byte_number(span_word);
+            return s + variant_byte_number(span_word);
 
 #endif
 
@@ -657,7 +657,7 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
             masked &= PERL_VARIANTS_WORD_MASK;
 
             /* This reduces the problem to that solved by this function */
-            s += _variant_byte_number(masked);
+            s += variant_byte_number(masked);
             return s;
 
         } while (s + PERL_WORDSIZE <= send);
@@ -723,7 +723,7 @@ S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
             masked |= masked << 1;
             masked |= masked << 2;
             masked |= masked << 4;
-            return s + _variant_byte_number(masked);
+            return s + variant_byte_number(masked);
 
 #endif
 
@@ -859,7 +859,7 @@ Perl_re_intuit_start(pTHX_
     RXi_GET_DECL(prog,progi);
     regmatch_info reginfo_buf;  /* create some info to pass to find_byclass */
     regmatch_info *const reginfo = &reginfo_buf;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_RE_INTUIT_START;
     PERL_UNUSED_ARG(flags);
@@ -1472,10 +1472,10 @@ Perl_re_intuit_start(pTHX_
         const U8* const str = (U8*)STRING(progi->regstclass);
 
         /* XXX this value could be pre-computed */
-        const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
+        const SSize_t cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 		    ?  (reginfo->is_utf8_pat
-                        ? utf8_distance(str + STR_LEN(progi->regstclass), str)
-                        : STR_LEN(progi->regstclass))
+                        ? (SSize_t)utf8_distance(str + STR_LEN(progi->regstclass), str)
+                        : (SSize_t)STR_LEN(progi->regstclass))
 		    : 1);
 	char * endpos;
         char *s;
@@ -1787,7 +1787,9 @@ STMT_START {
     STMT_START {                                            \
         while (s < strend) {                                \
             CODE                                            \
-            s += ((UTF8) ? UTF8SKIP(s) : 1);                \
+            s += ((UTF8)                                    \
+                  ? UTF8_SAFE_SKIP(s, reginfo->strend)      \
+                  : 1);                                     \
         }                                                   \
     } STMT_END
 
@@ -1801,7 +1803,7 @@ STMT_START {
 #define REXEC_FBC_CLASS_SCAN_GUTS(UTF8, COND)                  \
     if (COND) {                                                \
         FBC_CHECK_AND_TRY                                      \
-        s += ((UTF8) ? UTF8SKIP(s) : 1);                       \
+        s += ((UTF8) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1);\
         previous_occurrence_end = s;                           \
     }                                                          \
     else {                                                     \
@@ -1820,12 +1822,13 @@ STMT_START {
  * of the one we're looking for.  Knowing that, we can see right away if the
  * next occurrence is adjacent to the previous.  When 'doevery' is FALSE, we
  * don't accept the 2nd and succeeding adjacent occurrences */
-#define FBC_CHECK_AND_TRY                                      \
-        if (   (   doevery                                     \
-                || s != previous_occurrence_end)               \
-            && (reginfo->intuit || regtry(reginfo, &s)))       \
-        {                                                      \
-            goto got_it;                                       \
+#define FBC_CHECK_AND_TRY                                           \
+        if (   (   doevery                                          \
+                || s != previous_occurrence_end)                    \
+            && (   reginfo->intuit                                  \
+                || (s <= reginfo->strend && regtry(reginfo, &s))))  \
+        {                                                           \
+            goto got_it;                                            \
         }
 
 
@@ -1858,7 +1861,7 @@ STMT_START {
                                                             \
         if (COND) {                                         \
             FBC_CHECK_AND_TRY                               \
-            s += UTF8SKIP(s);                               \
+            s += UTF8_SAFE_SKIP(s, reginfo->strend);        \
             previous_occurrence_end = s;                    \
         }                                                   \
         else {                                              \
@@ -1972,9 +1975,12 @@ STMT_START {
     }
 
 /* This is the macro to use when we want to see if something that looks like it
- * could match, actually does, and if so exits the loop */
-#define REXEC_FBC_TRYIT                            \
-    if ((reginfo->intuit || regtry(reginfo, &s)))  \
+ * could match, actually does, and if so exits the loop.  It needs to be used
+ * only for bounds checking macros, as it allows for matching beyond the end of
+ * string (which should be zero length without having to look at the string
+ * contents) */
+#define REXEC_FBC_TRYIT                                                     \
+    if (reginfo->intuit || (s <= reginfo->strend && regtry(reginfo, &s)))   \
         goto got_it
 
 /* The only difference between the BOUND and NBOUND cases is that
@@ -2172,17 +2178,72 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 
     case ANYOFH:
         if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   (U8) NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
+        break;
+
+    case ANYOFHb:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+
+            /* We know what the first byte of any matched string should be */
             U8 first_byte = FLAGS(c);
 
-            if (first_byte) {   /* We know what the first byte of any matched
-                                   string should be */
-                REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
+            REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
                       reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
-            }
-            else {
-                REXEC_FBC_CLASS_SCAN(TRUE,
-                      reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
-            }
+        }
+        break;
+
+    case ANYOFHr:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   inRANGE(NATIVE_UTF8_TO_I8(*s),
+                              LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)),
+                              HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)))
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
+        break;
+
+    case ANYOFHs:
+        if (utf8_target) {  /* Can't possibly match a non-UTF-8 target */
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   strend -s >= FLAGS(c)
+                   && memEQ(s, ((struct regnode_anyofhs *) c)->string, FLAGS(c))
+                   && reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)));
+        }
+        break;
+
+    case ANYOFR:
+        if (utf8_target) {
+            REXEC_FBC_CLASS_SCAN(TRUE,
+                  (   NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) s,
+                                                    (U8 *) strend,
+                                                    NULL),
+                                  ANYOFRbase(c), ANYOFRdelta(c))));
+        }
+        else {
+            REXEC_FBC_CLASS_SCAN(0, withinCOUNT((U8) *s,
+                                               ANYOFRbase(c), ANYOFRdelta(c)));
+        }
+        break;
+
+    case ANYOFRb:
+        if (utf8_target) {
+
+            /* We know what the first byte of any matched string should be */
+            U8 first_byte = FLAGS(c);
+
+            REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
+                      withinCOUNT(utf8_to_uvchr_buf((U8 *) s,
+                                                    (U8 *) strend,
+                                                    NULL),
+                                  ANYOFRbase(c), ANYOFRdelta(c)));
+        }
+        else {
+            REXEC_FBC_CLASS_SCAN(0, withinCOUNT((U8) *s,
+                                               ANYOFRbase(c), ANYOFRdelta(c)));
         }
         break;
 
@@ -2247,7 +2308,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                                              | FOLDEQ_S2_FOLDS_SANE;
             goto do_exactf_utf8;
 
-    case EXACTFU_ONLY8:
+    case EXACTFU_REQ8:
         if (! utf8_target) {
             break;
         }
@@ -2279,8 +2340,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          * first character.  c2 is its fold.  This logic will not work for
          * Unicode semantics and the german sharp ss, which hence should
          * not be compiled into a node that gets here. */
-        pat_string = STRING(c);
-        ln  = STR_LEN(c);	/* length to match in octets/bytes */
+        pat_string = STRINGs(c);
+        ln  = STR_LENs(c);	/* length to match in octets/bytes */
 
         /* We know that we have to match at least 'ln' bytes (which is the
          * same as characters, since not utf8).  If we have to match 3
@@ -2355,8 +2416,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
         /* If one of the operands is in utf8, we can't use the simpler folding
          * above, due to the fact that many different characters can have the
          * same fold, or portion of a fold, or different- length fold */
-        pat_string = STRING(c);
-        ln  = STR_LEN(c);	/* length to match in octets/bytes */
+        pat_string = STRINGs(c);
+        ln  = STR_LENs(c);	/* length to match in octets/bytes */
         pat_end = pat_string + ln;
         lnc = is_utf8_pat       /* length to match in characters */
                 ? utf8_length((U8 *) pat_string, (U8 *) pat_end)
@@ -2395,7 +2456,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
             {
                 goto got_it;
             }
-            s += (utf8_target) ? UTF8SKIP(s) : 1;
+            s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
         }
         break;
     }
@@ -2479,7 +2540,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                     }
 
                     /* Didn't match.  Try at the next position (if there is one) */
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                     if (UNLIKELY(s >= reginfo->strend)) {
                         break;
                     }
@@ -2503,7 +2564,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                             goto got_it;
                         }
                         before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                     }
                 }
                 else {  /* Not utf8.  Everything is a GCB except between CR and
@@ -2521,7 +2582,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 
                 /* And, since this is a bound, it can match after the final
                  * character in the string */
-                if ((reginfo->intuit || regtry(reginfo, &s))) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                     goto got_it;
                 }
                 break;
@@ -2531,7 +2594,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                     if (reginfo->intuit || regtry(reginfo, &s)) {
                         goto got_it;
                     }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                     if (UNLIKELY(s >= reginfo->strend)) {
                         break;
                     }
@@ -2555,7 +2618,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                             goto got_it;
                         }
                         before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                     }
                 }
                 else {  /* Not utf8. */
@@ -2577,7 +2640,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                     }
                 }
 
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                     goto got_it;
                 }
 
@@ -2588,7 +2653,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                     if (reginfo->intuit || regtry(reginfo, &s)) {
                         goto got_it;
                     }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                     if (UNLIKELY(s >= reginfo->strend)) {
                         break;
                     }
@@ -2613,7 +2678,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                             goto got_it;
                         }
                         before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                     }
                 }
                 else {  /* Not utf8. */
@@ -2638,7 +2703,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                 /* Here are at the final position in the target string.  The SB
                  * value is always true here, so matches, depending on other
                  * constraints */
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                     goto got_it;
                 }
 
@@ -2649,7 +2716,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                     if (reginfo->intuit || regtry(reginfo, &s)) {
                         goto got_it;
                     }
-                    s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
                     if (UNLIKELY(s >= reginfo->strend)) {
                         break;
                     }
@@ -2683,7 +2750,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                         }
                         previous = before;
                         before = after;
-                        s += UTF8SKIP(s);
+                        s += UTF8_SAFE_SKIP(s, reginfo->strend);
                     }
                 }
                 else {  /* Not utf8. */
@@ -2708,7 +2775,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                     }
                 }
 
-                if (reginfo->intuit || regtry(reginfo, &s)) {
+                if (   reginfo->intuit
+                    || (s <= reginfo->strend && regtry(reginfo, &s)))
+                {
                     goto got_it;
                 }
         }
@@ -2845,7 +2914,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
             U8 *bitmap=NULL;
 
 
-            GET_RE_DEBUG_FLAGS_DECL;
+            DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
             /* We can't just allocate points here. We need to wrap it in
              * an SV so it gets freed properly if there is a croak while
@@ -3025,7 +3094,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                         LEAVE;
                         goto got_it;
                     }
-                    s = HOPc(s,1);
+                    if (s < reginfo->strend) {
+                        s = HOPc(s,1);
+                    }
                     DEBUG_TRIE_EXECUTE_r({
                         Perl_re_printf( aTHX_ "Pattern failed. Looking for new start point...\n");
                     });
@@ -3226,7 +3297,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
     regmatch_info *const reginfo = &reginfo_buf;
     regexp_paren_pair *swap = NULL;
     I32 oldsave;
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
     PERL_UNUSED_ARG(data);
@@ -3280,7 +3351,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
                 if (!startpos ||
                     ((flags & REXEC_FAIL_ON_UNDERFLOW) && startpos < stringarg))
                 {
-                    DEBUG_r(Perl_re_printf( aTHX_
+                    DEBUG_GPOS_r(Perl_re_printf( aTHX_
                             "fail: ganch-gofs before earliest possible start\n"));
                     return 0;
                 }
@@ -3299,8 +3370,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
 
     minlen = prog->minlen;
     if ((startpos + minlen) > strend || startpos < strbeg) {
-        DEBUG_r(Perl_re_printf( aTHX_
-                    "Regex match can't succeed, so not even tried\n"));
+	DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
+                        "Regex match can't succeed, so not even tried\n"));
         return 0;
     }
 
@@ -3544,7 +3615,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
 		if (*s == ch) {
 		    DEBUG_EXECUTE_r( did_match = 1 );
 		    if (regtry(reginfo, &s)) goto got_it;
-		    s += UTF8SKIP(s);
+		    s += UTF8_SAFE_SKIP(s, strend);
 		    while (s < strend && *s == ch)
 			s += UTF8SKIP(s);
 		}
@@ -3912,7 +3983,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
     U32 depth = 0; /* used by REGCP_SET */
 #endif
     RXi_GET_DECL(prog,progi);
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 
     PERL_ARGS_ASSERT_REGTRY;
 
@@ -4208,7 +4279,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
     U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
 
     if (   OP(text_node) == EXACT
-        || OP(text_node) == EXACT_ONLY8
+        || OP(text_node) == LEXACT
+        || OP(text_node) == EXACT_REQ8
+        || OP(text_node) == LEXACT_REQ8
         || OP(text_node) == EXACTL)
     {
 
@@ -4217,7 +4290,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
          * copy the input to the output, avoiding finding the code point of
          * that character */
         if (!is_utf8_pat) {
-            assert(OP(text_node) != EXACT_ONLY8);
+            assert(   OP(text_node) != EXACT_REQ8
+                   && OP(text_node) != LEXACT_REQ8);
             c2 = c1 = *pat;
         }
         else if (utf8_target) {
@@ -4225,7 +4299,9 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
             Copy(pat, c2_utf8, UTF8SKIP(pat), U8);
             utf8_has_been_setup = TRUE;
         }
-        else if (OP(text_node) == EXACT_ONLY8) {
+        else if (   OP(text_node) == EXACT_REQ8
+                 || OP(text_node) == LEXACT_REQ8)
+        {
             return FALSE;   /* Can only match UTF-8 target */
         }
         else {
@@ -4233,7 +4309,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
         }
     }
     else { /* an EXACTFish node */
-        U8 *pat_end = pat + STR_LEN(text_node);
+        U8 *pat_end = pat + STR_LENs(text_node);
 
         /* An EXACTFL node has at least some characters unfolded, because what
          * they match is not known until now.  So, now is the time to fold
@@ -4315,8 +4391,8 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                 }
             }
             else if (c1 > 255) {
-                const unsigned int * remaining_folds;
-                unsigned int first_fold;
+                const U32 * remaining_folds;
+                U32 first_fold;
 
                 /* Look up what code points (besides c1) fold to c1;  e.g.,
                  * [ 'K', KELVIN_SIGN ] both fold to 'k'. */
@@ -4398,7 +4474,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
                     case EXACTFU:
                         c2 = PL_fold_latin1[c1];
                         break;
-                    case EXACTFU_ONLY8:
+                    case EXACTFU_REQ8:
                         return FALSE;
                         NOT_REACHED; /* NOTREACHED */
 
@@ -4450,7 +4526,7 @@ STATIC bool
 S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strbeg, const U8 * const curpos, const bool utf8_target)
 {
     /* returns a boolean indicating if there is a Grapheme Cluster Boundary
-     * between the inputs.  See http://www.unicode.org/reports/tr29/. */
+     * between the inputs.  See https://www.unicode.org/reports/tr29/. */
 
     PERL_ARGS_ASSERT_ISGCB;
 
@@ -4512,7 +4588,7 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
                 }
                 while (prev == GCB_Extend);
 
-                return prev != GCB_XPG_XX;
+                return prev != GCB_ExtPict_XX;
             }
 
         default:
@@ -4887,7 +4963,7 @@ S_isSB(pTHX_ SB_enum before,
              const bool utf8_target)
 {
     /* returns a boolean indicating if there is a Sentence Boundary Break
-     * between the inputs.  See http://www.unicode.org/reports/tr29/ */
+     * between the inputs.  See https://www.unicode.org/reports/tr29/ */
 
     U8 * lpos = (U8 *) curpos;
     bool has_para_sep = FALSE;
@@ -5505,18 +5581,20 @@ S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos,
 
 /* push a new state then goto it */
 
-#define PUSH_STATE_GOTO(state, node, input, eol) \
+#define PUSH_STATE_GOTO(state, node, input, eol, sr0)       \
     pushinput = input; \
     pusheol = eol; \
+    pushsr0 = sr0; \
     scan = node; \
     st->resume_state = state; \
     goto push_state;
 
 /* push a new state with success backtracking, then goto it */
 
-#define PUSH_YES_STATE_GOTO(state, node, input, eol) \
+#define PUSH_YES_STATE_GOTO(state, node, input, eol, sr0)   \
     pushinput = input; \
     pusheol = eol;     \
+    pushsr0 = sr0; \
     scan = node; \
     st->resume_state = state; \
     goto push_yes_state;
@@ -5700,6 +5778,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
     char *loceol = reginfo->strend;
     char *pushinput; /* where to continue after a PUSH */
     char *pusheol;   /* where to stop matching (loceol) after a PUSH */
+    U8   *pushsr0;   /* save starting pos of script run */
     I32 nextchr;   /* is always set to UCHARAT(locinput), or -1 at EOS */
 
     bool result = 0;	    /* return value of S_regmatch */
@@ -5761,7 +5840,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 #endif
 
 #ifdef DEBUGGING
-    GET_RE_DEBUG_FLAGS_DECL;
+    DECLARE_AND_GET_RE_DEBUG_FLAGS;
 #endif
 
     /* protect against undef(*^R) */
@@ -5836,7 +5915,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	    /* update the startpoint */
 	    st->u.keeper.val = rex->offs[0].start;
 	    rex->offs[0].start = locinput - reginfo->strbeg;
-	    PUSH_STATE_GOTO(KEEPS_next, next, locinput, loceol);
+	    PUSH_STATE_GOTO(KEEPS_next, next, locinput, loceol,
+                            script_run_begin);
 	    NOT_REACHED; /* NOTREACHED */
 
 	case KEEPS_next_fail:
@@ -6212,7 +6292,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	    });
 
 	    if ( ST.accepted > 1 || has_cutgroup || ST.jump ) {
-		PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc, loceol);
+		PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc, loceol,
+                                script_run_begin);
 		NOT_REACHED; /* NOTREACHED */
 	    }
 	    /* only one choice left - just continue */
@@ -6240,6 +6321,20 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
         }
 #undef  ST
 
+	case LEXACT_REQ8:
+            if (! utf8_target) {
+                sayNO;
+            }
+            /* FALLTHROUGH */
+
+	case LEXACT:
+        {
+	    char *s;
+
+	    s = STRINGl(scan);
+	    ln = STR_LENl(scan);
+            goto join_short_long_exact;
+
 	case EXACTL:             /*  /abc/l       */
             _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
 
@@ -6253,16 +6348,18 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend);
             }
             goto do_exact;
-	case EXACT_ONLY8:
+	case EXACT_REQ8:
             if (! utf8_target) {
                 sayNO;
             }
             /* FALLTHROUGH */
-	case EXACT: {            /*  /abc/        */
-	    char *s;
+
+	case EXACT:             /*  /abc/        */
           do_exact:
-	    s = STRING(scan);
-	    ln = STR_LEN(scan);
+	    s = STRINGs(scan);
+	    ln = STR_LENs(scan);
+
+          join_short_long_exact:
 	    if (utf8_target != is_utf8_pat) {
 		/* The target and the pattern have differing utf8ness. */
 		char *l = locinput;
@@ -6365,7 +6462,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	    fold_array = PL_fold_latin1;
 	    goto do_exactf;
 
-        case EXACTFU_ONLY8:      /* /abc/iu with something in /abc/ > 255 */
+        case EXACTFU_REQ8:      /* /abc/iu with something in /abc/ > 255 */
             if (! utf8_target) {
                 sayNO;
             }
@@ -6414,8 +6511,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	    fold_utf8_flags = 0;
 
 	  do_exactf:
-	    s = STRING(scan);
-	    ln = STR_LEN(scan);
+	    s = STRINGs(scan);
+	    ln = STR_LENs(scan);
 
 	    if (   utf8_target
                 || is_utf8_pat
@@ -6472,9 +6569,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 		if (locinput == reginfo->strbeg)
 		    b1 = isWORDCHAR_LC('\n');
 		else {
-                    b1 = isWORDCHAR_LC_utf8_safe(reghop3((U8*)locinput, -1,
-                                                        (U8*)(reginfo->strbeg)),
-                                                 (U8*)(reginfo->strend));
+                    U8 *p = reghop3((U8*)locinput, -1,
+                                    (U8*)(reginfo->strbeg));
+                    b1 = isWORDCHAR_LC_utf8_safe(p, (U8*)(reginfo->strend));
 		}
                 b2 = (NEXTCHR_IS_EOS)
                     ? isWORDCHAR_LC('\n')
@@ -6551,13 +6648,15 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                     case TRADITIONAL_BOUND:
                     {
                         bool b1, b2;
-                        b1 = (locinput == reginfo->strbeg)
-                             ? 0 /* isWORDCHAR_L1('\n') */
-                             : isWORDCHAR_utf8_safe(
-                                               reghop3((U8*)locinput,
-                                                       -1,
-                                                       (U8*)(reginfo->strbeg)),
-                                                    (U8*) reginfo->strend);
+                        if (locinput == reginfo->strbeg) {
+                            b1 = 0 /* isWORDCHAR_L1('\n') */;
+                        }
+                        else {
+                            U8 *p = reghop3((U8*)locinput, -1,
+                                            (U8*)(reginfo->strbeg));
+
+                            b1 = isWORDCHAR_utf8_safe(p, (U8*) reginfo->strend);
+                        }
                         b2 = (NEXTCHR_IS_EOS)
                             ? 0 /* isWORDCHAR_L1('\n') */
                             : isWORDCHAR_utf8_safe((U8*)locinput,
@@ -6782,8 +6881,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
         case ANYOFH:
             if (   ! utf8_target
                 ||   NEXTCHR_IS_EOS
-                ||  (   ANYOF_FLAGS(scan) != 0
-                     && ANYOF_FLAGS(scan) != (U8) *locinput)
+                ||   ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8(*locinput)
 	        || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
                                                                    utf8_target))
             {
@@ -6792,6 +6890,95 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             goto increment_locinput;
             break;
 
+        case ANYOFHb:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                ||   ANYOF_FLAGS(scan) != (U8) *locinput
+	        || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                  utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHr:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                || ! inRANGE((U8) NATIVE_UTF8_TO_I8(*locinput),
+                             LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)),
+                             HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)))
+	        || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFHs:
+            if (   ! utf8_target
+                ||   NEXTCHR_IS_EOS
+                ||   loceol - locinput < FLAGS(scan)
+                ||   memNE(locinput, ((struct regnode_anyofhs *) scan)->string, FLAGS(scan))
+	        || ! reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
+                                                                   utf8_target))
+            {
+                sayNO;
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFR:
+            if (NEXTCHR_IS_EOS) {
+                sayNO;
+            }
+
+            if (utf8_target) {
+                if (    ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8(*locinput)
+                   || ! withinCOUNT(utf8_to_uvchr_buf((U8 *) locinput,
+                                                (U8 *) reginfo->strend,
+                                                NULL),
+                                    ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            else {
+                if (! withinCOUNT((U8) *locinput,
+                                  ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            goto increment_locinput;
+            break;
+
+        case ANYOFRb:
+            if (NEXTCHR_IS_EOS) {
+                sayNO;
+            }
+
+            if (utf8_target) {
+                if (     ANYOF_FLAGS(scan) != (U8) *locinput
+                    || ! withinCOUNT(utf8_to_uvchr_buf((U8 *) locinput,
+                                                (U8 *) reginfo->strend,
+                                                NULL),
+                                     ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            else {
+                if (! withinCOUNT((U8) *locinput,
+                                  ANYOFRbase(scan), ANYOFRdelta(scan)))
+                {
+                    sayNO;
+                }
+            }
+            goto increment_locinput;
+            break;
+
         /* The argument (FLAGS) to all the POSIX node types is the class number
          * */
 
@@ -7013,7 +7200,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	    }
 	    break;
             
-	case NREFFL:  /*  /\g{name}/il  */
+	case REFFLN:  /*  /\g{name}/il  */
 	{   /* The capture buffer cases.  The ones beginning with N for the
 	       named buffers just convert to the equivalent numbered and
 	       pretend they were called as the corresponding numbered buffer
@@ -7033,28 +7220,28 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	    utf8_fold_flags = FOLDEQ_LOCALE;
 	    goto do_nref;
 
-	case NREFFA:  /*  /\g{name}/iaa  */
+	case REFFAN:  /*  /\g{name}/iaa  */
 	    folder = foldEQ_latin1;
 	    fold_array = PL_fold_latin1;
 	    type = REFFA;
 	    utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
 	    goto do_nref;
 
-	case NREFFU:  /*  /\g{name}/iu  */
+	case REFFUN:  /*  /\g{name}/iu  */
 	    folder = foldEQ_latin1;
 	    fold_array = PL_fold_latin1;
 	    type = REFFU;
 	    utf8_fold_flags = 0;
 	    goto do_nref;
 
-	case NREFF:  /*  /\g{name}/i  */
+	case REFFN:  /*  /\g{name}/i  */
 	    folder = foldEQ;
 	    fold_array = PL_fold;
 	    type = REFF;
 	    utf8_fold_flags = 0;
 	    goto do_nref;
 
-	case NREF:  /*  /\g{name}/   */
+	case REFN:  /*  /\g{name}/   */
 	    type = REF;
 	    folder = NULL;
 	    fold_array = NULL;
@@ -7203,7 +7390,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 rex->recurse_locinput[arg]= locinput;
 
                 DEBUG_r({
-                    GET_RE_DEBUG_FLAGS_DECL;
+                    DECLARE_AND_GET_RE_DEBUG_FLAGS;
                     DEBUG_STACK_r({
                         Perl_re_exec_indentf( aTHX_
                             "entering GOSUB, prev_recurse_locinput=%p recurse_locinput[%d]=%p\n",
@@ -7222,7 +7409,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             /* NOTREACHED */
 
         case EVAL:  /*   /(?{...})B/   /(??{A})B/  and  /(?(?{...})X|Y)B/   */
-            if (cur_eval && cur_eval->locinput==locinput) {
+            if (logical == 2 && cur_eval && cur_eval->locinput==locinput) {
 		if ( ++nochange_depth > max_nochange_depth )
                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
             } else {
@@ -7450,7 +7637,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 PL_curpm = PL_reg_curpm;
 
 		if (logical != 2) {
-                    PUSH_STATE_GOTO(EVAL_B, next, locinput, loceol);
+                    PUSH_STATE_GOTO(EVAL_B, next, locinput, loceol,
+                                    script_run_begin);
 		    /* NOTREACHED */
                 }
 	    }
@@ -7550,7 +7738,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 		ST.prev_eval = cur_eval;
 		cur_eval = st;
 		/* now continue from first node in postoned RE */
-		PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput, loceol);
+		PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput,
+                                    loceol, script_run_begin);
 		NOT_REACHED; /* NOTREACHED */
 	}
 
@@ -7706,7 +7895,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	    sw = cBOOL(rex->lastparen >= n && rex->offs[n].end != -1);
 	    break;
 
-	case NGROUPP:  /*  (?(<name>))  */
+	case GROUPPN:  /*  (?(<name>))  */
 	    /* reg_check_named_buff_matched returns 0 for no match */
 	    sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
 	    break;
@@ -7850,7 +8039,8 @@ NULL
 	    ST.count = -1;	/* this will be updated by WHILEM */
 	    ST.lastloc = NULL;  /* this will be updated by WHILEM */
 
-	    PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput, loceol);
+	    PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next), locinput, loceol,
+                                script_run_begin);
 	    NOT_REACHED; /* NOTREACHED */
 	}
 
@@ -7898,7 +8088,8 @@ NULL
 		cur_curlyx->u.curlyx.lastloc = locinput;
 		REGCP_SET(ST.lastcp);
 
-		PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput, loceol);
+		PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput, loceol,
+                                script_run_begin);
 		NOT_REACHED; /* NOTREACHED */
 	    }
 
@@ -8006,7 +8197,7 @@ NULL
 		ST.save_curlyx = cur_curlyx;
 		cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
 		PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
-                                    locinput, loceol);
+                                    locinput, loceol, script_run_begin);
 		NOT_REACHED; /* NOTREACHED */
 	    }
 
@@ -8017,7 +8208,8 @@ NULL
                             maxopenparen);
 		cur_curlyx->u.curlyx.lastloc = locinput;
 		REGCP_SET(ST.lastcp);
-		PUSH_STATE_GOTO(WHILEM_A_max, A, locinput, loceol);
+		PUSH_STATE_GOTO(WHILEM_A_max, A, locinput, loceol,
+                                script_run_begin);
 		NOT_REACHED; /* NOTREACHED */
 	    }
 	    goto do_whilem_B_max;
@@ -8069,7 +8261,7 @@ NULL
 	    ST.save_curlyx = cur_curlyx;
 	    cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
 	    PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
-                                locinput, loceol);
+                                locinput, loceol, script_run_begin);
 	    NOT_REACHED; /* NOTREACHED */
 
 	case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
@@ -8100,7 +8292,7 @@ NULL
 	    REGCP_SET(ST.lastcp);
 	    PUSH_STATE_GOTO(WHILEM_A_min,
 		/*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
-                locinput, loceol);
+                locinput, loceol, script_run_begin);
 	    NOT_REACHED; /* NOTREACHED */
 
 #undef  ST
@@ -8122,9 +8314,11 @@ NULL
 
 	    /* Now go into the branch */
 	    if (has_cutgroup) {
-	        PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput, loceol);
+	        PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
+                                    script_run_begin);
 	    } else {
-	        PUSH_STATE_GOTO(BRANCH_next, scan, locinput, loceol);
+	        PUSH_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
+                                script_run_begin);
 	    }
 	    NOT_REACHED; /* NOTREACHED */
 
@@ -8132,7 +8326,8 @@ NULL
             sv_yes_mark = st->u.mark.mark_name = scan->flags
                 ? MUTABLE_SV(rexi->data->data[ ARG( scan ) ])
                 : NULL;
-            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol);
+            PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol,
+                            script_run_begin);
             NOT_REACHED; /* NOTREACHED */
 
         case CUTGROUP_next_fail:
@@ -8209,7 +8404,8 @@ NULL
 		goto curlym_do_B;
 
 	  curlym_do_A: /* execute the A in /A{m,n}B/  */
-	    PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput, loceol); /* match A */
+	    PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput, loceol, /* match A */
+                                script_run_begin);
 	    NOT_REACHED; /* NOTREACHED */
 
 	case CURLYM_A: /* we've just matched an A */
@@ -8333,7 +8529,8 @@ NULL
 	        }
 	    }
 	    
-	    PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput, loceol); /* match B */
+	    PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput, loceol,   /* match B */
+                            script_run_begin);
 	    NOT_REACHED; /* NOTREACHED */
 
 	case CURLYM_B_fail: /* just failed to match a B */
@@ -8633,7 +8830,8 @@ NULL
 
           curly_try_B_min:
             CURLY_SETPAREN(ST.paren, ST.count);
-            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput, loceol);
+            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput, loceol,
+                            script_run_begin);
 	    NOT_REACHED; /* NOTREACHED */
 
 
@@ -8660,7 +8858,8 @@ NULL
                 }
                 if (ST.c1 == CHRTEST_VOID || could_match) {
 		    CURLY_SETPAREN(ST.paren, ST.count);
-		    PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput, loceol);
+		    PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput, loceol,
+                                    script_run_begin);
 		    NOT_REACHED; /* NOTREACHED */
 		}
 	    }
@@ -8715,8 +8914,9 @@ NULL
 
                 SET_RECURSE_LOCINPUT("FAKE-END[after]", cur_eval->locinput);
 
-                PUSH_YES_STATE_GOTO(EVAL_postponed_AB, st->u.eval.prev_eval->u.eval.B,
-                                    locinput, loceol); /* match B */
+                PUSH_YES_STATE_GOTO(EVAL_postponed_AB,          /* match B */
+                                    st->u.eval.prev_eval->u.eval.B,
+                                    locinput, loceol, script_run_begin);
 	    }
 
 	    if (locinput < reginfo->till) {
@@ -8802,7 +9002,8 @@ NULL
 	    logical = 0; /* XXX: reset state of logical once it has been saved into ST */
 	    
 	    /* execute body of (?...A) */
-	    PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), ST.start, ST.end);
+	    PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)), ST.start,
+                                ST.end, script_run_begin);
 	    NOT_REACHED; /* NOTREACHED */
 
         {
@@ -8836,6 +9037,7 @@ NULL
                 /* restore old position except for (?>...) */
 		locinput = st->locinput;
                 loceol = st->loceol;
+                script_run_begin = st->sr0;
 	    }
 	    scan = ST.me + ARG(ST.me);
 	    if (scan == ST.me)
@@ -8859,7 +9061,8 @@ NULL
 	case PRUNE:   /*  (*PRUNE)   */
             if (scan->flags)
 	        sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
-	    PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol);
+	    PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol,
+                            script_run_begin);
 	    NOT_REACHED; /* NOTREACHED */
 
 	case COMMIT_next_fail:
@@ -8889,7 +9092,8 @@ NULL
                 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
             mark_state = st;
             ST.mark_loc = locinput;
-            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput, loceol);
+            PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput, loceol,
+                                script_run_begin);
             NOT_REACHED; /* NOTREACHED */
 
         case MARKPOINT_next:
@@ -8922,7 +9126,8 @@ NULL
                 /* (*SKIP) : if we fail we cut here*/
                 ST.mark_name = NULL;
                 ST.mark_loc = locinput;
-                PUSH_STATE_GOTO(SKIP_next,next, locinput, loceol);
+                PUSH_STATE_GOTO(SKIP_next,next, locinput, loceol,
+                                script_run_begin);
             } else {
                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was, 
                    otherwise do nothing.  Meaning we need to scan 
@@ -8935,7 +9140,8 @@ NULL
                                 find ) ) 
                     {
                         ST.mark_name = find;
-                        PUSH_STATE_GOTO( SKIP_next, next, locinput, loceol);
+                        PUSH_STATE_GOTO( SKIP_next, next, locinput, loceol,
+                                         script_run_begin);
                     }
                     cur = cur->u.mark.prev_mark;
                 }
@@ -9032,6 +9238,7 @@ NULL
 	    depth++;
 	    st->locinput = locinput;
 	    st->loceol = loceol;
+            st->sr0 = script_run_begin;
 	    newst = st+1; 
 	    if (newst >  SLAB_LAST(PL_regmatch_slab))
 		newst = S_push_slab(aTHX);
@@ -9039,6 +9246,7 @@ NULL
 
 	    locinput = pushinput;
             loceol = pusheol;
+            script_run_begin = pushsr0;
 	    st = newst;
 	    continue;
             /* NOTREACHED */
@@ -9094,6 +9302,7 @@ NULL
         if (no_final) {
             locinput= st->locinput;
             loceol= st->loceol;
+            script_run_begin = st->sr0;
         }
 	state_num = st->resume_state + no_final;
 	goto reenter_switch;
@@ -9145,6 +9354,7 @@ NULL
 	PL_regmatch_state = st;
 	locinput= st->locinput;
 	loceol= st->loceol;
+        script_run_begin = st->sr0;
 
 	DEBUG_STATE_pp("pop");
 	depth--;
@@ -9281,6 +9491,22 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
 	else
 	    scan = this_eol;
 	break;
+
+    case LEXACT_REQ8:
+        if (! utf8_target) {
+            break;
+        }
+        /* FALLTHROUGH */
+
+    case LEXACT:
+      {
+        U8 * string;
+        Size_t str_len;
+
+	string = (U8 *) STRINGl(p);
+        str_len = STR_LENl(p);
+        goto join_short_long_exact;
+
     case EXACTL:
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
         if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) {
@@ -9288,16 +9514,20 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         }
         goto do_exact;
 
-    case EXACT_ONLY8:
+    case EXACT_REQ8:
         if (! utf8_target) {
             break;
         }
         /* FALLTHROUGH */
     case EXACT:
       do_exact:
-        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+	string = (U8 *) STRINGs(p);
+        str_len = STR_LENs(p);
+
+      join_short_long_exact:
+        assert(str_len == reginfo->is_utf8_pat ? UTF8SKIP(string) : 1);
 
-	c = (U8)*STRING(p);
+	c = *string;
 
         /* Can use a simple find if the pattern char to match on is invariant
          * under UTF-8, or both target and pattern aren't UTF-8.  Note that we
@@ -9319,8 +9549,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                  * string EQ */
                 while (hardcount < max
                        && scan < this_eol
-                       && (scan_char_len = UTF8SKIP(scan)) <= STR_LEN(p)
-                       && memEQ(scan, STRING(p), scan_char_len))
+                       && (scan_char_len = UTF8SKIP(scan)) <= str_len
+                       && memEQ(scan, string, scan_char_len))
                 {
                     scan += scan_char_len;
                     hardcount++;
@@ -9330,7 +9560,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
 
                 /* Target isn't utf8; convert the character in the UTF-8
                  * pattern to non-UTF8, and do a simple find */
-                c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
+                c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(string + 1));
                 scan = (char *) find_span_end((U8 *) scan, (U8 *) this_eol, (U8) c);
             } /* else pattern char is above Latin1, can't possibly match the
                  non-UTF-8 target */
@@ -9354,6 +9584,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
 	    }
 	}
 	break;
+      }
 
     case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
         assert(! reginfo->is_utf8_pat);
@@ -9386,7 +9617,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                                     | FOLDEQ_S2_FOLDS_SANE;
         goto do_exactf;
 
-    case EXACTFU_ONLY8:
+    case EXACTFU_REQ8:
         if (! utf8_target) {
             break;
         }
@@ -9404,7 +9635,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         int c1, c2;
         U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
 
-        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LENs(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRINGs(p)) : 1);
 
         if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
                                         reginfo))
@@ -9412,10 +9643,10 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
             if (c1 == CHRTEST_VOID) {
                 /* Use full Unicode fold matching */
                 char *tmpeol = loceol;
-                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
+                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRINGs(p)) : 1;
                 while (hardcount < max
                         && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
-                                             STRING(p), NULL, pat_len,
+                                             STRINGs(p), NULL, pat_len,
                                              reginfo->is_utf8_pat, utf8_flags))
                 {
                     scan = tmpeol;
@@ -9534,22 +9765,42 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
 
     case ANYOFH:
         if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
-            if (ANYOF_FLAGS(p)) {   /* If we know the first byte of what
-                                       matches, we can avoid calling reginclass
-                                     */
-                while (   hardcount < max
-                       && scan < this_eol
-                       && (U8) *scan == ANYOF_FLAGS(p)
-                       && reginclass(prog, p, (U8*)scan, (U8*) this_eol,
-                                                                  TRUE))
-                {
-                    scan += UTF8SKIP(scan);
-                    hardcount++;
-                }
+            while (  hardcount < max
+                   && scan < this_eol
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFHb:
+        if (utf8_target) {  /* ANYOFHb only can match UTF-8 targets */
+
+            /* we know the first byte must be the FLAGS field */
+            while (   hardcount < max
+                   && scan < this_eol
+                   && (U8) *scan == ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol,
+                                                              TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
             }
-            else while (  hardcount < max
-                        && scan < this_eol
-                        && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+        }
+        break;
+
+    case ANYOFHr:
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (  hardcount < max
+                   && scan < this_eol
+                   && inRANGE(NATIVE_UTF8_TO_I8(*scan),
+                              LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)),
+                              HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)))
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
             {
                 scan += UTF8SKIP(scan);
                 hardcount++;
@@ -9557,6 +9808,69 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         }
         break;
 
+    case ANYOFHs:
+        if (utf8_target) {  /* ANYOFH only can match UTF-8 targets */
+            while (   hardcount < max
+                   && scan + FLAGS(p) < this_eol
+                   && memEQ(scan, ((struct regnode_anyofhs *) p)->string, FLAGS(p))
+                   && reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFR:
+        if (utf8_target) {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) scan,
+                                                (U8 *) this_eol,
+                                                NULL),
+                                  ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        else {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && withinCOUNT((U8) *scan, ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan++;
+                hardcount++;
+            }
+        }
+        break;
+
+    case ANYOFRb:
+        if (utf8_target) {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && (U8) *scan == ANYOF_FLAGS(p)
+                   && withinCOUNT(utf8_to_uvchr_buf((U8 *) scan,
+                                                (U8 *) this_eol,
+                                                NULL),
+                                  ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan += UTF8SKIP(scan);
+                hardcount++;
+            }
+        }
+        else {
+            while (   hardcount < max
+                   && scan < this_eol
+                   && withinCOUNT((U8) *scan, ANYOFRbase(p), ANYOFRdelta(p)))
+            {
+                scan++;
+                hardcount++;
+            }
+        }
+        break;
+
     /* The argument (FLAGS) to all the POSIX node types is the class number */
 
     case NPOSIXL:
@@ -9768,7 +10082,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
     *startposp = scan;
 
     DEBUG_r({
-	GET_RE_DEBUG_FLAGS_DECL;
+	DECLARE_AND_GET_RE_DEBUG_FLAGS;
 	DEBUG_EXECUTE_r({
 	    SV * const prop = sv_newmortal();
             regprop(prog, prop, p, reginfo, NULL);
@@ -9800,7 +10114,9 @@ STATIC bool
 S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const p, const U8* const p_end, const bool utf8_target)
 {
     dVAR;
-    const char flags = (OP(n) == ANYOFH) ? 0 : ANYOF_FLAGS(n);
+    const char flags = (inRANGE(OP(n), ANYOFH, ANYOFHs))
+                        ? 0
+                        : ANYOF_FLAGS(n);
     bool match = FALSE;
     UV c = *p;
 
@@ -9827,7 +10143,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
     }
 
     /* If this character is potentially in the bitmap, check it */
-    if (c < NUM_ANYOF_CODE_POINTS && OP(n) != ANYOFH) {
+    if (c < NUM_ANYOF_CODE_POINTS && ! inRANGE(OP(n), ANYOFH, ANYOFHb)) {
 	if (ANYOF_BITMAP_TEST(n, c))
 	    match = TRUE;
 	else if ((flags
@@ -10129,6 +10445,7 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
     regmatch_info_aux_eval *eval_state = reginfo->info_aux_eval;
 
     eval_state->rex = rex;
+    eval_state->sv  = reginfo->sv;
 
     if (reginfo->sv) {
         /* Make $_ available to executed code. */
@@ -10136,6 +10453,8 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
             SAVE_DEFSV;
             DEFSV_set(reginfo->sv);
         }
+        /* will be dec'd by S_cleanup_regmatch_info_aux */
+        SvREFCNT_inc_NN(reginfo->sv);
 
         if (!(mg = mg_find_mglob(reginfo->sv))) {
             /* prepare for quick setting of pos */
@@ -10227,6 +10546,7 @@ S_cleanup_regmatch_info_aux(pTHX_ void *arg)
         }
 
         PL_curpm = eval_state->curpm;
+        SvREFCNT_dec(eval_state->sv);
     }
 
     PL_regmatch_state = aux->old_regmatch_state;
@@ -10297,6 +10617,7 @@ S_to_byte_substr(pTHX_ regexp *prog)
 	    && !prog->substrs->data[i].substr) {
 	    SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
 	    if (! sv_utf8_downgrade(sv, TRUE)) {
+                SvREFCNT_dec_NN(sv);
                 return FALSE;
             }
             if (SvVALID(prog->substrs->data[i].utf8_substr)) {
@@ -10320,13 +10641,13 @@ S_to_byte_substr(pTHX_ regexp *prog)
 #ifndef PERL_IN_XSUB_RE
 
 bool
-Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const UV cp)
+Perl_is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, const UV cp)
 {
     /* Temporary helper function for toke.c.  Verify that the code point 'cp'
      * is a stand-alone grapheme.  The UTF-8 for 'cp' begins at position 's' in
      * the larger string bounded by 'strbeg' and 'strend'.
      *
-     * 'cp' needs to be assigned (if not a future version of the Unicode
+     * 'cp' needs to be assigned (if not, a future version of the Unicode
      * Standard could make it something that combines with adjacent characters,
      * so code using it would then break), and there has to be a GCB break
      * before and after the character. */
@@ -10336,7 +10657,7 @@ Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, cons
     GCB_enum cp_gcb_val, prev_cp_gcb_val, next_cp_gcb_val;
     const U8 * prev_cp_start;
 
-    PERL_ARGS_ASSERT__IS_GRAPHEME;
+    PERL_ARGS_ASSERT_IS_GRAPHEME;
 
     if (   UNLIKELY(UNICODE_IS_SUPER(cp))
         || UNLIKELY(UNICODE_IS_NONCHAR(cp)))