This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Correct typo reported by Elizabeth Mattijsen
[perl5.git] / regexec.c
index a721895..16a2309 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -175,32 +175,6 @@ static const char* const non_utf8_target_but_utf8_required
     locinput = (p);  \
     SET_nextchr
 
-
-#define LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist) STMT_START {   \
-        if (!swash_ptr) {                                                     \
-            U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;                       \
-            swash_ptr = _core_swash_init("utf8", property_name, &PL_sv_undef, \
-                                         1, 0, invlist, &flags);              \
-            assert(swash_ptr);                                                \
-        }                                                                     \
-    } STMT_END
-
-/* If in debug mode, we test that a known character properly matches */
-#ifdef DEBUGGING
-#   define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr,                          \
-                                          property_name,                      \
-                                          invlist,                            \
-                                          utf8_char_in_property)              \
-        LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist);               \
-        assert(swash_fetch(swash_ptr, (U8 *) utf8_char_in_property, TRUE));
-#else
-#   define LOAD_UTF8_CHARCLASS_DEBUG_TEST(swash_ptr,                          \
-                                          property_name,                      \
-                                          invlist,                            \
-                                          utf8_char_in_property)              \
-        LOAD_UTF8_CHARCLASS(swash_ptr, property_name, invlist)
-#endif
-
 #define PLACEHOLDER    /* Something for the preprocessor to grab onto */
 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 
@@ -354,7 +328,34 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
     );                                                          \
     regcpblow(cp)
 
+/* set the start and end positions of capture ix */
+#define CLOSE_CAPTURE(ix, s, e)                                            \
+    rex->offs[ix].start = s;                                               \
+    rex->offs[ix].end = e;                                                 \
+    if (ix > rex->lastparen)                                               \
+        rex->lastparen = ix;                                               \
+    rex->lastcloseparen = ix;                                              \
+    DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_                            \
+        "CLOSE: rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf "..%" IVdf " max: %" UVuf "\n", \
+        depth,                                                             \
+        PTR2UV(rex),                                                       \
+        PTR2UV(rex->offs),                                                 \
+        (UV)ix,                                                            \
+        (IV)rex->offs[ix].start,                                           \
+        (IV)rex->offs[ix].end,                                             \
+        (UV)rex->lastparen                                                 \
+    ))
+
 #define UNWIND_PAREN(lp, lcp)               \
+    DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_  \
+        "UNWIND_PAREN: rex=0x%" UVxf " offs=0x%" UVxf ": invalidate (%" UVuf "..%" UVuf "] set lcp: %" UVuf "\n", \
+        depth,                              \
+        PTR2UV(rex),                        \
+        PTR2UV(rex->offs),                  \
+        (UV)(lp),                           \
+        (UV)(rex->lastparen),               \
+        (UV)(lcp)                           \
+    ));                                     \
     for (n = rex->lastparen; n > lp; n--)   \
         rex->offs[n].end = -1;              \
     rex->lastparen = n;                     \
@@ -1807,7 +1808,7 @@ Perl_re_intuit_start(pTHX_
 #define DECL_TRIE_TYPE(scan) \
     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold,       \
                  trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold,              \
-                 trie_utf8l, trie_flu8 }                                            \
+                 trie_utf8l, trie_flu8, trie_flu8_latin }                           \
                     trie_type = ((scan->flags == EXACT)                             \
                                  ? (utf8_target ? trie_utf8 : trie_plain)           \
                                  : (scan->flags == EXACTL)                          \
@@ -1817,20 +1818,24 @@ Perl_re_intuit_start(pTHX_
                                          ? trie_utf8_exactfa_fold                   \
                                          : trie_latin_utf8_exactfa_fold)            \
                                       : (scan->flags == EXACTFLU8                   \
-                                         ? trie_flu8                                \
+                                         ? (utf8_target                             \
+                                           ? trie_flu8                              \
+                                           : trie_flu8_latin)                       \
                                          : (utf8_target                             \
                                            ? trie_utf8_fold                         \
-                                           :   trie_latin_utf8_fold)))
+                                           : trie_latin_utf8_fold)))
 
-#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
+/* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is
+ * 'foldbuf+sizeof(foldbuf)' */
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uc_end, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
 STMT_START {                                                                        \
     STRLEN skiplen;                                                                 \
     U8 flags = FOLD_FLAGS_FULL;                                                     \
     switch (trie_type) {                                                            \
     case trie_flu8:                                                                 \
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
-        if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) {                             \
-            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc));          \
+        if (UTF8_IS_ABOVE_LATIN1(*uc)) {                                            \
+            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end);                     \
         }                                                                           \
         goto do_trie_utf8_fold;                                                     \
     case trie_utf8_exactfa_fold:                                                    \
@@ -1839,25 +1844,29 @@ STMT_START {
     case trie_utf8_fold:                                                            \
       do_trie_utf8_fold:                                                            \
         if ( foldlen>0 ) {                                                          \
-            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
+            uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags );     \
             foldlen -= len;                                                         \
             uscan += len;                                                           \
             len=0;                                                                  \
         } else {                                                                    \
-            len = UTF8SKIP(uc);                                                     \
-            uvc = _toFOLD_utf8_flags( (const U8*) uc, uc + len, foldbuf, &foldlen,  \
+            uvc = _toFOLD_utf8_flags( (const U8*) uc, uc_end, foldbuf, &foldlen,    \
                                                                             flags); \
+            len = UTF8SKIP(uc);                                                     \
             skiplen = UVCHR_SKIP( uvc );                                            \
             foldlen -= skiplen;                                                     \
             uscan = foldbuf + skiplen;                                              \
         }                                                                           \
         break;                                                                      \
+    case trie_flu8_latin:                                                           \
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
+        goto do_trie_latin_utf8_fold;                                               \
     case trie_latin_utf8_exactfa_fold:                                              \
         flags |= FOLD_FLAGS_NOMIX_ASCII;                                            \
         /* FALLTHROUGH */                                                           \
     case trie_latin_utf8_fold:                                                      \
+      do_trie_latin_utf8_fold:                                                      \
         if ( foldlen>0 ) {                                                          \
-            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
+            uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags );     \
             foldlen -= len;                                                         \
             uscan += len;                                                           \
             len=0;                                                                  \
@@ -1876,7 +1885,7 @@ STMT_START {
         }                                                                           \
         /* FALLTHROUGH */                                                           \
     case trie_utf8:                                                                 \
-        uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags );        \
+        uvc = utf8n_to_uvchr( (const U8*) uc, uc_end - uc, &len, uniflags );        \
         break;                                                                      \
     case trie_plain:                                                                \
         uvc = (UV)*uc;                                                              \
@@ -2103,7 +2112,7 @@ STMT_START {
 #ifdef DEBUGGING
 static IV
 S_get_break_val_cp_checked(SV* const invlist, const UV cp_in) {
-  IV cp_out = Perl__invlist_search(invlist, cp_in);
+  IV cp_out = _invlist_search(invlist, cp_in);
   assert(cp_out >= 0);
   return cp_out;
 }
@@ -2249,7 +2258,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
     case ANYOFM:    /* ARG() is the base byte; FLAGS() the mask byte */
         /* UTF-8ness doesn't matter, so use 0 */
         REXEC_FBC_FIND_NEXT_SCAN(0,
-         (char *) find_next_masked((U8 *) s, (U8 *) strend, ARG(c), FLAGS(c)));
+         (char *) find_next_masked((U8 *) s, (U8 *) strend,
+                                   (U8) ARG(c), FLAGS(c)));
         break;
 
     case EXACTFAA_NO_TRIE: /* This node only generated for non-utf8 patterns */
@@ -2996,10 +3006,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                     }
                     points[pointpos++ % maxlen]= uc;
                     if (foldlen || uc < (U8*)strend) {
-                        REXEC_TRIE_READ_CHAR(trie_type, trie,
-                                         widecharmap, uc,
-                                         uscan, len, uvc, charid, foldlen,
-                                         foldbuf, uniflags);
+                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
+                                             (U8 *) strend, uscan, len, uvc,
+                                             charid, foldlen, foldbuf,
+                                             uniflags);
                         DEBUG_TRIE_EXECUTE_r({
                             dump_exec_pos( (char *)uc, c, strend,
                                         real_start, s, utf8_target, 0);
@@ -4536,70 +4546,40 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
         else { /* an EXACTFish node which doesn't begin with a multi-char fold */
             c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
             if (c1 > 255) {
-                /* Load the folds hash, if not already done */
-                SV** listp;
-                if (! PL_utf8_foldclosures) {
-                    _load_PL_utf8_foldclosures();
+                const unsigned int * remaining_folds_to_list;
+                unsigned int first_folds_to;
+
+                /* Look up what code points (besides c1) fold to c1;  e.g.,
+                 * [ 'K', KELVIN_SIGN ] both fold to 'k'. */
+                Size_t folds_to_count = _inverse_folds(c1,
+                                                     &first_folds_to,
+                                                     &remaining_folds_to_list);
+                if (folds_to_count == 0) {
+                    c2 = c1;    /* there is only a single character that could
+                                   match */
                 }
-
-                /* The fold closures data structure is a hash with the keys
-                 * being the UTF-8 of every character that is folded to, like
-                 * 'k', and the values each an array of all code points that
-                 * fold to its key.  e.g. [ 'k', 'K', KELVIN_SIGN ].
-                 * Multi-character folds are not included */
-                if ((! (listp = hv_fetch(PL_utf8_foldclosures,
-                                        (char *) pat,
-                                        UTF8SKIP(pat),
-                                        FALSE))))
-                {
-                    /* Not found in the hash, therefore there are no folds
-                    * containing it, so there is only a single character that
-                    * could match */
-                    c2 = c1;
+                else if (folds_to_count != 1) {
+                    /* If there aren't exactly two folds to this (itself and
+                     * another), it is outside the scope of this function */
+                    use_chrtest_void = TRUE;
                 }
-                else {  /* Does participate in folds */
-                    AV* list = (AV*) *listp;
-                    if (av_tindex_skip_len_mg(list) != 1) {
-
-                        /* If there aren't exactly two folds to this, it is
-                         * outside the scope of this function */
-                        use_chrtest_void = TRUE;
-                    }
-                    else {  /* There are two.  Get them */
-                        SV** c_p = av_fetch(list, 0, FALSE);
-                        if (c_p == NULL) {
-                            Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
-                        }
-                        c1 = SvUV(*c_p);
-
-                        c_p = av_fetch(list, 1, FALSE);
-                        if (c_p == NULL) {
-                            Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
-                        }
-                        c2 = SvUV(*c_p);
-
-                        /* Folds that cross the 255/256 boundary are forbidden
-                         * if EXACTFL (and isnt a UTF8 locale), or EXACTFAA and
-                         * one is ASCIII.  Since the pattern character is above
-                         * 255, and its only other match is below 256, the only
-                         * legal match will be to itself.  We have thrown away
-                         * the original, so have to compute which is the one
-                         * above 255. */
-                        if ((c1 < 256) != (c2 < 256)) {
-                            if ((OP(text_node) == EXACTFL
-                                 && ! IN_UTF8_CTYPE_LOCALE)
-                                || ((OP(text_node) == EXACTFAA
-                                    || OP(text_node) == EXACTFAA_NO_TRIE)
-                                    && (isASCII(c1) || isASCII(c2))))
-                            {
-                                if (c1 < 256) {
-                                    c1 = c2;
-                                }
-                                else {
-                                    c2 = c1;
-                                }
-                            }
-                        }
+                else {  /* There are two.  We already have one, get the other */
+                    c2 = first_folds_to;
+
+                    /* Folds that cross the 255/256 boundary are forbidden if
+                     * EXACTFL (and isnt a UTF8 locale), or EXACTFAA and one is
+                     * ASCIII.  The only other match to c1 is c2, and since c1
+                     * is above 255, c2 better be as well under these
+                     * circumstances.  If it isn't, it means the only legal
+                     * match of c1 is itself. */
+                    if (    c2 < 256
+                        && (   (   OP(text_node) == EXACTFL
+                                && ! IN_UTF8_CTYPE_LOCALE)
+                            || ((     OP(text_node) == EXACTFAA
+                                   || OP(text_node) == EXACTFAA_NO_TRIE)
+                                && (isASCII(c1) || isASCII(c2)))))
+                    {
+                        c2 = c1;
                     }
                 }
             }
@@ -4746,6 +4726,24 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
                 return prev != GCB_E_Base && prev != GCB_E_Base_GAZ;
             }
 
+        case GCB_Maybe_Emoji_NonBreak:
+
+            {
+
+            /* Do not break within emoji modifier sequences or emoji zwj sequences.
+              GB11 \p{Extended_Pictographic} Extend* ZWJ Ã— \p{Extended_Pictographic}
+              */
+                U8 * temp_pos = (U8 *) curpos;
+                GCB_enum prev;
+
+                do {
+                    prev = backup_one_GCB(strbeg, &temp_pos, utf8_target);
+                }
+                while (prev == GCB_Extend);
+
+                return prev != GCB_XPG_XX;
+            }
+
         default:
             break;
     }
@@ -5905,7 +5903,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
              */
             if(!NEXTCHR_IS_EOS && !ANYOF_BITMAP_TEST(scan, nextchr)) {
                 DEBUG_EXECUTE_r(
-                    Perl_re_exec_indentf( aTHX_  "%sfailed to match trie start class...%s\n",
+                    Perl_re_exec_indentf( aTHX_  "%sTRIE: failed to match trie start class...%s\n",
                               depth, PL_colors[4], PL_colors[5])
                 );
                 sayNO_SILENT;
@@ -5970,7 +5968,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 if (scan->flags == EXACTL || scan->flags == EXACTFLU8) {
                     _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
                     if (utf8_target
-                        && nextchr >= 0 /* guard against negative EOS value in nextchr */
+                        && ! NEXTCHR_IS_EOS
                         && UTF8_IS_ABOVE_LATIN1(nextchr)
                         && scan->flags == EXACTL)
                     {
@@ -5986,14 +5984,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 {
                    if (trie->states[ state ].wordnum) {
                         DEBUG_EXECUTE_r(
-                            Perl_re_exec_indentf( aTHX_  "%smatched empty string...%s\n",
+                            Perl_re_exec_indentf( aTHX_  "%sTRIE: matched empty string...%s\n",
                                           depth, PL_colors[4], PL_colors[5])
                         );
                        if (!trie->jump)
                            break;
                    } else {
                        DEBUG_EXECUTE_r(
-                            Perl_re_exec_indentf( aTHX_  "%sfailed to match trie start class...%s\n",
+                            Perl_re_exec_indentf( aTHX_  "%sTRIE: failed to match trie start class...%s\n",
                                           depth, PL_colors[4], PL_colors[5])
                         );
                        sayNO_SILENT;
@@ -6049,7 +6047,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                                 DUMP_EXEC_POS( (char *)uc, scan, utf8_target, depth );
                                 /* HERE */
                                 PerlIO_printf( Perl_debug_log,
-                                    "%*s%sState: %4" UVxf " Accepted: %c ",
+                                    "%*s%sTRIE: State: %4" UVxf " Accepted: %c ",
                                     INDENT_CHARS(depth), "", PL_colors[4],
                                    (UV)state, (accepted ? 'Y' : 'N'));
                    });
@@ -6058,8 +6056,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                    if ( base && (foldlen || uc < (U8*)(reginfo->strend))) {
                        I32 offset;
                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
-                                            uscan, len, uvc, charid, foldlen,
-                                            foldbuf, uniflags);
+                                             (U8 *) reginfo->strend, uscan,
+                                             len, uvc, charid, foldlen,
+                                             foldbuf, uniflags);
                        charcount++;
                        if (foldlen>0)
                            ST.longfold = TRUE;
@@ -6083,7 +6082,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                    }
                    DEBUG_TRIE_EXECUTE_r(
                         Perl_re_printf( aTHX_
-                           "Charid:%3x CP:%4" UVxf " After State: %4" UVxf "%s\n",
+                           "TRIE: Charid:%3x CP:%4" UVxf " After State: %4" UVxf "%s\n",
                            charid, uvc, (UV)state, PL_colors[5] );
                    );
                }
@@ -6102,7 +6101,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                }
 
                DEBUG_EXECUTE_r(
-                    Perl_re_exec_indentf( aTHX_  "%sgot %" IVdf " possible matches%s\n",
+                    Perl_re_exec_indentf( aTHX_  "%sTRIE: got %" IVdf " possible matches%s\n",
                         depth,
                        PL_colors[4], (IV)ST.accepted, PL_colors[5] );
                );
@@ -6194,8 +6193,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                        while (foldlen) {
                            if (!--chars)
                                break;
-                           uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len,
-                                           uniflags);
+                           uvc = utf8n_to_uvchr(uscan, foldlen, &len,
+                                                 uniflags);
                            uscan += len;
                            foldlen -= len;
                        }
@@ -6235,7 +6234,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                         ? av_fetch(trie_words, ST.nextword - 1, 0) : NULL;
                SV *sv= tmp ? sv_newmortal() : NULL;
 
-                Perl_re_exec_indentf( aTHX_  "%sonly one match left, short-circuiting: #%d <%s>%s\n",
+                Perl_re_exec_indentf( aTHX_  "%sTRIE: only one match left, short-circuiting: #%d <%s>%s\n",
                     depth, PL_colors[4],
                    ST.nextword,
                    tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
@@ -6882,50 +6881,50 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 classnum = (_char_class_number) FLAGS(scan);
                 switch (classnum) {
                     default:
-                    if (! (to_complement
+                        if (! (to_complement
                            ^ cBOOL(_invlist_contains_cp(
                                       PL_XPosix_ptrs[classnum],
                                       utf8_to_uvchr_buf((U8 *) locinput,
                                                         (U8 *) reginfo->strend,
                                                         NULL)))))
-                    {
-                        sayNO;
-                    }
+                        {
+                            sayNO;
+                        }
+                        break;
+                    case _CC_ENUM_SPACE:
+                        if (! (to_complement
+                                    ^ cBOOL(is_XPERLSPACE_high(locinput))))
+                        {
+                            sayNO;
+                        }
+                        break;
+                    case _CC_ENUM_BLANK:
+                        if (! (to_complement
+                                        ^ cBOOL(is_HORIZWS_high(locinput))))
+                        {
+                            sayNO;
+                        }
+                        break;
+                    case _CC_ENUM_XDIGIT:
+                        if (! (to_complement
+                                        ^ cBOOL(is_XDIGIT_high(locinput))))
+                        {
+                            sayNO;
+                        }
+                        break;
+                    case _CC_ENUM_VERTSPACE:
+                        if (! (to_complement
+                                        ^ cBOOL(is_VERTWS_high(locinput))))
+                        {
+                            sayNO;
+                        }
+                        break;
+                    case _CC_ENUM_CNTRL:    /* These can't match above Latin1 */
+                    case _CC_ENUM_ASCII:
+                        if (! to_complement) {
+                            sayNO;
+                        }
                         break;
-                        case _CC_ENUM_SPACE:
-                            if (! (to_complement
-                                        ^ cBOOL(is_XPERLSPACE_high(locinput))))
-                            {
-                                sayNO;
-                            }
-                            break;
-                        case _CC_ENUM_BLANK:
-                            if (! (to_complement
-                                            ^ cBOOL(is_HORIZWS_high(locinput))))
-                            {
-                                sayNO;
-                            }
-                            break;
-                        case _CC_ENUM_XDIGIT:
-                            if (! (to_complement
-                                            ^ cBOOL(is_XDIGIT_high(locinput))))
-                            {
-                                sayNO;
-                            }
-                            break;
-                        case _CC_ENUM_VERTSPACE:
-                            if (! (to_complement
-                                            ^ cBOOL(is_VERTWS_high(locinput))))
-                            {
-                                sayNO;
-                            }
-                            break;
-                        case _CC_ENUM_CNTRL:    /* These can't match above Latin1 */
-                        case _CC_ENUM_ASCII:
-                            if (! to_complement) {
-                                sayNO;
-                            }
-                            break;
                 }
                 locinput += UTF8SKIP(locinput);
             }
@@ -7467,7 +7466,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                rei = RXi_GET(re);
                 DEBUG_EXECUTE_r(
                     debug_start_match(re_sv, utf8_target, locinput,
-                                    reginfo->strend, "Matching embedded");
+                                    reginfo->strend, "EVAL/GOSUB: Matching embedded");
                );              
                startpoint = rei->program + 1;
                 EVAL_CLOSE_PAREN_CLEAR(st); /* ST.close_paren = 0;
@@ -7597,7 +7596,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
            if (n > maxopenparen)
                maxopenparen = n;
             DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
-               "rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf " tmp; maxopenparen=%" UVuf "\n",
+               "OPEN: rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf " tmp; maxopenparen=%" UVuf "\n",
                 depth,
                PTR2UV(rex),
                PTR2UV(rex->offs),
@@ -7612,26 +7611,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             script_run_begin = (U8 *) locinput;
             break;
 
-/* XXX really need to log other places start/end are set too */
-#define CLOSE_CAPTURE                                                      \
-    rex->offs[n].start = rex->offs[n].start_tmp;                           \
-    rex->offs[n].end = locinput - reginfo->strbeg;                         \
-    DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_                            \
-        "rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf "..%" IVdf "\n", \
-        depth,                                                             \
-        PTR2UV(rex),                                                       \
-        PTR2UV(rex->offs),                                                 \
-        (UV)n,                                                             \
-        (IV)rex->offs[n].start,                                            \
-        (IV)rex->offs[n].end                                               \
-    ))
 
        case CLOSE:  /*  )  */
            n = ARG(scan);  /* which paren pair */
-           CLOSE_CAPTURE;
-           if (n > rex->lastparen)
-               rex->lastparen = n;
-           rex->lastcloseparen = n;
+           CLOSE_CAPTURE(n, rex->offs[n].start_tmp,
+                             locinput - reginfo->strbeg);
             if ( EVAL_CLOSE_PAREN_IS( cur_eval, n ) )
                goto fake_end;
 
@@ -7659,10 +7643,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                     if ( OP(cursor)==CLOSE ){
                         n = ARG(cursor);
                         if ( n <= lastopen ) {
-                           CLOSE_CAPTURE;
-                            if (n > rex->lastparen)
-                                rex->lastparen = n;
-                            rex->lastcloseparen = n;
+                           CLOSE_CAPTURE(n, rex->offs[n].start_tmp,
+                                             locinput - reginfo->strbeg);
                             if ( n == ARG(scan) || EVAL_CLOSE_PAREN_IS(cur_eval, n) )
                                 break;
                         }
@@ -7858,7 +7840,7 @@ NULL
            ST.cache_mask = 0;
            
 
-            DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "whilem: matched %ld out of %d..%d\n",
+            DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "WHILEM: matched %ld out of %d..%d\n",
                   depth, (long)n, min, max)
            );
 
@@ -7876,7 +7858,7 @@ NULL
            /* If degenerate A matches "", assume A done. */
 
            if (locinput == cur_curlyx->u.curlyx.lastloc) {
-                DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "whilem: empty match detected, trying continuation...\n",
+                DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "WHILEM: empty match detected, trying continuation...\n",
                    depth)
                );
                goto do_whilem_B_max;
@@ -7944,7 +7926,7 @@ NULL
                        Newxz(aux->poscache, size, char);
                    }
                     DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
-      "%swhilem: Detected a super-linear match, switching on caching%s...\n",
+      "%sWHILEM: Detected a super-linear match, switching on caching%s...\n",
                              PL_colors[4], PL_colors[5])
                    );
                }
@@ -7960,7 +7942,7 @@ NULL
                    mask    = 1 << (offset % 8);
                    offset /= 8;
                    if (reginfo->info_aux->poscache[offset] & mask) {
-                        DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "whilem: (cache) already tried at this position...\n",
+                        DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_  "WHILEM: (cache) already tried at this position...\n",
                             depth)
                        );
                         cur_curlyx->u.curlyx.count--;
@@ -8008,11 +7990,11 @@ NULL
            CACHEsayNO;
            NOT_REACHED; /* NOTREACHED */
 
+       case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
+           /* FALLTHROUGH */
        case WHILEM_A_pre_fail: /* just failed to match even minimal A */
            REGCP_UNWIND(ST.lastcp);
             regcppop(rex, &maxopenparen);
-           /* FALLTHROUGH */
-       case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
            cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
            cur_curlyx->u.curlyx.count--;
            CACHEsayNO;
@@ -8021,7 +8003,7 @@ NULL
        case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
            REGCP_UNWIND(ST.lastcp);
             regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
-            DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_  "whilem: failed, trying continuation...\n",
+            DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_  "WHILEM: failed, trying continuation...\n",
                 depth)
            );
          do_whilem_B_max:
@@ -8062,10 +8044,13 @@ NULL
                CACHEsayNO;
            }
 
-            DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_  "trying longer...\n", depth)
+            DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_  "WHILEM: B min fail: trying longer...\n", depth)
            );
            /* Try grabbing another A and see if it helps. */
            cur_curlyx->u.curlyx.lastloc = locinput;
+            ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
+                            maxopenparen);
+           REGCP_SET(ST.lastcp);
            PUSH_STATE_GOTO(WHILEM_A_min,
                /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
                 locinput);
@@ -8285,14 +8270,11 @@ NULL
 
            if (ST.me->flags) {
                /* emulate CLOSE: mark current A as captured */
-               I32 paren = ST.me->flags;
+               U32 paren = (U32)ST.me->flags;
                if (ST.count) {
-                   rex->offs[paren].start
-                       = HOPc(locinput, -ST.alen) - reginfo->strbeg;
-                   rex->offs[paren].end = locinput - reginfo->strbeg;
-                   if ((U32)paren > rex->lastparen)
-                       rex->lastparen = paren;
-                   rex->lastcloseparen = paren;
+                    CLOSE_CAPTURE(paren,
+                       HOPc(locinput, -ST.alen) - reginfo->strbeg,
+                       locinput - reginfo->strbeg);
                }
                else
                    rex->offs[paren].end = -1;
@@ -8331,11 +8313,8 @@ NULL
 #define CURLY_SETPAREN(paren, success) \
     if (paren) { \
        if (success) { \
-           rex->offs[paren].start = HOPc(locinput, -1) - reginfo->strbeg; \
-           rex->offs[paren].end = locinput - reginfo->strbeg; \
-           if (paren > rex->lastparen) \
-               rex->lastparen = paren; \
-           rex->lastcloseparen = paren; \
+            CLOSE_CAPTURE(paren, HOPc(locinput, -1) - reginfo->strbeg, \
+                                locinput - reginfo->strbeg); \
        } \
        else { \
            rex->offs[paren].end = -1; \
@@ -8366,12 +8345,18 @@ NULL
                maxopenparen = ST.paren;
            ST.min = ARG1(scan);  /* min to match */
            ST.max = ARG2(scan);  /* max to match */
+            scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
+
+            /* handle the single-char capture called as a GOSUB etc */
             if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
             {
-               ST.min=1;
-               ST.max=1;
+                char *li = locinput;
+                if (!regrepeat(rex, &li, scan, reginfo, 1))
+                   sayNO;
+                SET_locinput(li);
+                goto fake_end;
            }
-            scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
+
            goto repeat;
 
        case CURLY:             /*  /A{m,n}B/ where A is width 1 char */
@@ -8487,24 +8472,41 @@ NULL
            }
            NOT_REACHED; /* NOTREACHED */
 
-       case CURLY_B_min_known_fail:
-           /* failed to find B in a non-greedy match where c1,c2 valid */
+       case CURLY_B_min_fail:
+           /* failed to find B in a non-greedy match.
+             * Handles both cases where c1,c2 valid or not */
 
            REGCP_UNWIND(ST.cp);
             if (ST.paren) {
                 UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
             }
-           /* Couldn't or didn't -- move forward. */
-           ST.oldloc = locinput;
-           if (utf8_target)
-               locinput += UTF8SKIP(locinput);
-           else
-               locinput++;
-           ST.count++;
-         curly_try_B_min_known:
-            /* find the next place where 'B' could work, then call B */
-           {
+
+            if (ST.c1 == CHRTEST_VOID) {
+                /* failed -- move forward one */
+                char *li = locinput;
+                if (!regrepeat(rex, &li, ST.A, reginfo, 1)) {
+                    sayNO;
+                }
+                locinput = li;
+                ST.count++;
+               if (!(   ST.count <= ST.max
+                        /* count overflow ? */
+                     || (ST.max == REG_INFTY && ST.count > 0))
+                )
+                    sayNO;
+            }
+            else {
                int n;
+                /* Couldn't or didn't -- move forward. */
+                ST.oldloc = locinput;
+                if (utf8_target)
+                    locinput += UTF8SKIP(locinput);
+                else
+                    locinput++;
+                ST.count++;
+
+              curly_try_B_min_known:
+                /* find the next place where 'B' could work, then call B */
                if (utf8_target) {
                    n = (ST.oldloc == locinput) ? 0 : 1;
                    if (ST.c1 == ST.c2) {
@@ -8583,47 +8585,16 @@ NULL
                        sayNO;
                     assert(n == REG_INFTY || locinput == li);
                }
-               CURLY_SETPAREN(ST.paren, ST.count);
-                if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
-                   goto fake_end;
-               PUSH_STATE_GOTO(CURLY_B_min_known, ST.B, locinput);
            }
-           NOT_REACHED; /* NOTREACHED */
-
-       case CURLY_B_min_fail:
-           /* failed to find B in a non-greedy match where c1,c2 invalid */
 
-           REGCP_UNWIND(ST.cp);
-            if (ST.paren) {
-                UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
-            }
-           /* failed -- move forward one */
-            {
-                char *li = locinput;
-                if (!regrepeat(rex, &li, ST.A, reginfo, 1)) {
-                    sayNO;
-                }
-                locinput = li;
-            }
-            {
-               ST.count++;
-               if (ST.count <= ST.max || (ST.max == REG_INFTY &&
-                       ST.count > 0)) /* count overflow ? */
-               {
-                 curly_try_B_min:
-                   CURLY_SETPAREN(ST.paren, ST.count);
-                    if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
-                        goto fake_end;
-                   PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
-               }
-           }
-            sayNO;
+          curly_try_B_min:
+            CURLY_SETPAREN(ST.paren, ST.count);
+            PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
            NOT_REACHED; /* NOTREACHED */
 
+
           curly_try_B_max:
            /* a successful greedy match: now try to match B */
-            if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
-                goto fake_end;
            {
                bool could_match = locinput < reginfo->strend;
 
@@ -8693,7 +8664,7 @@ NULL
                st->u.eval.prev_eval = cur_eval;
                 cur_eval = CUR_EVAL.prev_eval;
                DEBUG_EXECUTE_r(
-                    Perl_re_exec_indentf( aTHX_  "EVAL trying tail ... (cur_eval=%p)\n",
+                    Perl_re_exec_indentf( aTHX_  "END: EVAL trying tail ... (cur_eval=%p)\n",
                                       depth, cur_eval););
                 if ( nochange_depth )
                    nochange_depth--;
@@ -8706,7 +8677,7 @@ NULL
 
            if (locinput < reginfo->till) {
                 DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
-                                      "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
+                                      "%sEND: Match possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
                                      PL_colors[4],
                                      (long)(locinput - startpos),
                                      (long)(reginfo->till - startpos),
@@ -8718,7 +8689,7 @@ NULL
 
        case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
            DEBUG_EXECUTE_r(
-            Perl_re_exec_indentf( aTHX_  "%ssubpattern success...%s\n",
+            Perl_re_exec_indentf( aTHX_  "%sSUCCEED: subpattern success...%s\n",
                 depth, PL_colors[4], PL_colors[5]));
            sayYES;                     /* Success! */
 
@@ -8853,7 +8824,7 @@ NULL
                 sv_commit = ST.mark_name;
 
                 DEBUG_EXECUTE_r({
-                        Perl_re_exec_indentf( aTHX_  "%ssetting cutpoint to mark:%" SVf "...%s\n",
+                        Perl_re_exec_indentf( aTHX_  "%sMARKPOINT: next fail: setting cutpoint to mark:%" SVf "...%s\n",
                             depth,
                            PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
                });
@@ -9545,60 +9516,62 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
                     }
                     break;
 
-                /* For these character classes, the knowledge of how to handle
-                 * every code point is compiled in to Perl via a macro.  This
-                 * code is written for making the loops as tight as possible.
-                 * It could be refactored to save space instead */
-                    case _CC_ENUM_SPACE:
-                        while (hardcount < max
-                               && scan < loceol
-                               && (to_complement
-                                   ^ cBOOL(isSPACE_utf8_safe(scan, loceol))))
-                        {
-                            scan += UTF8SKIP(scan);
-                            hardcount++;
-                        }
-                        break;
-                    case _CC_ENUM_BLANK:
-                        while (hardcount < max
-                               && scan < loceol
-                               && (to_complement
-                                    ^ cBOOL(isBLANK_utf8_safe(scan, loceol))))
-                        {
-                            scan += UTF8SKIP(scan);
-                            hardcount++;
-                        }
-                        break;
-                    case _CC_ENUM_XDIGIT:
-                        while (hardcount < max
-                               && scan < loceol
-                               && (to_complement
-                                   ^ cBOOL(isXDIGIT_utf8_safe(scan, loceol))))
-                        {
-                            scan += UTF8SKIP(scan);
-                            hardcount++;
-                        }
-                        break;
-                    case _CC_ENUM_VERTSPACE:
-                        while (hardcount < max
-                               && scan < loceol
-                               && (to_complement
-                                   ^ cBOOL(isVERTWS_utf8_safe(scan, loceol))))
-                        {
-                            scan += UTF8SKIP(scan);
-                            hardcount++;
-                        }
-                        break;
-                    case _CC_ENUM_CNTRL:
-                        while (hardcount < max
-                               && scan < loceol
-                               && (to_complement
-                                   ^ cBOOL(isCNTRL_utf8_safe(scan, loceol))))
-                        {
-                            scan += UTF8SKIP(scan);
-                            hardcount++;
-                        }
-                        break;
+                    /* For the classes below, the knowledge of how to handle
+                     * every code point is compiled in to Perl via a macro.
+                     * This code is written for making the loops as tight as
+                     * possible.  It could be refactored to save space instead.
+                     * */
+
+                case _CC_ENUM_SPACE:
+                    while (hardcount < max
+                           && scan < loceol
+                           && (to_complement
+                               ^ cBOOL(isSPACE_utf8_safe(scan, loceol))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
+                    }
+                    break;
+                case _CC_ENUM_BLANK:
+                    while (hardcount < max
+                           && scan < loceol
+                           && (to_complement
+                                ^ cBOOL(isBLANK_utf8_safe(scan, loceol))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
+                    }
+                    break;
+                case _CC_ENUM_XDIGIT:
+                    while (hardcount < max
+                           && scan < loceol
+                           && (to_complement
+                               ^ cBOOL(isXDIGIT_utf8_safe(scan, loceol))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
+                    }
+                    break;
+                case _CC_ENUM_VERTSPACE:
+                    while (hardcount < max
+                           && scan < loceol
+                           && (to_complement
+                               ^ cBOOL(isVERTWS_utf8_safe(scan, loceol))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
+                    }
+                    break;
+                case _CC_ENUM_CNTRL:
+                    while (hardcount < max
+                           && scan < loceol
+                           && (to_complement
+                               ^ cBOOL(isCNTRL_utf8_safe(scan, loceol))))
+                    {
+                        scan += UTF8SKIP(scan);
+                        hardcount++;
+                    }
+                    break;
             }
        }
         break;
@@ -10201,7 +10174,14 @@ Perl__is_grapheme(pTHX_ const U8 * strbeg, const U8 * s, const U8 * strend, cons
 
     PERL_ARGS_ASSERT__IS_GRAPHEME;
 
-    /* Unassigned code points are forbidden */
+    if (   UNLIKELY(UNICODE_IS_SUPER(cp))
+        || UNLIKELY(UNICODE_IS_NONCHAR(cp)))
+    {
+        /* These are considered graphemes */
+        return TRUE;
+    }
+
+    /* Otherwise, unassigned code points are forbidden */
     if (UNLIKELY(! ELEMENT_RANGE_MATCHES_INVLIST(
                                     _invlist_search(PL_Assigned_invlist, cp))))
     {
@@ -10325,6 +10305,8 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
 
     /* What code point is the digit '0' of the script run? */
     UV zero_of_run = 0;
+#define SEEN_A_DIGIT (zero_of_run != 0)
+
     SCX_enum script_of_run  = SCX_INVALID;   /* Illegal value */
     SCX_enum script_of_char = SCX_INVALID;
 
@@ -10376,7 +10358,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
                 retval = FALSE;
                 break;
             }
-            if (zero_of_run > 0) {
+            if (SEEN_A_DIGIT) {
                 if (zero_of_run != '0') {
                     retval = FALSE;
                     break;
@@ -10402,7 +10384,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
         /* If is within the range [+0 .. +9] of the script's zero, it also is a
          * digit in that script.  We can skip the rest of this code for this
          * character. */
-        if (UNLIKELY(   zero_of_run > 0
+        if (UNLIKELY(   SEEN_A_DIGIT
                      && cp >= zero_of_run
                      && cp - zero_of_run <= 9))
         {
@@ -10467,7 +10449,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
 
             /* But Common contains several sets of digits.  Only the '0' set
              * can be part of another script. */
-            if (zero_of_run > 0 && zero_of_run != '0') {
+            if (SEEN_A_DIGIT && zero_of_run != '0') {
                 retval = FALSE;
                 break;
             }
@@ -10481,7 +10463,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
          * one to use, and the character is in that sequence.  Now that we know
          * the script, we can use script_zeros[] to directly find which
          * sequence the script uses, except in a few cases it returns 0 */
-        if (UNLIKELY(zero_of_run == 0) && script_of_char >= 0) {
+        if (UNLIKELY(zero_of_run == 0 && script_of_char >= 0)) {
             zero_of_run = script_zeros[script_of_char];
         }
 
@@ -10624,9 +10606,11 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
             /* If there is only a single script in common, set to that.
              * Otherwise, use the intersection going forward */
             Safefree(intersection);
+            intersection = NULL;
             if (intersection_len == 1) {
                 script_of_run = script_of_char = new_overlap[0];
                 Safefree(new_overlap);
+                new_overlap = NULL;
             }
             else {
                 intersection = new_overlap;
@@ -10638,23 +10622,39 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
   scripts_match:
 
         /* Here, the script of the character is compatible with that of the
-         * run.  Either they match exactly, or one or both can be any of
-         * several scripts, and the intersection is not empty.  If the
-         * character is not a decimal digit, we are done with it.  Otherwise,
-         * it could still fail if it is from a different set of 10 than seen
-         * already (or we may not have seen any, and we need to set the
-         * sequence).  If we have determined a single script and that script
-         * only has one set of digits (almost all scripts are like that), then
-         * this isn't a problem, as any digit must come from the same sequence.
-         * The only scripts that have multiple sequences have been constructed
-         * to be 0 in 'script_zeros[]'.
+         * run.  That means that in most cases, it continues the script run.
+         * Either it and the run match exactly, or one or both can be in any of
+         * several scripts, and the intersection is not empty.  But if the
+         * character is a decimal digit, we need further handling.  If we
+         * haven't seen a digit before, it would establish what set of 10 all
+         * must come from; and if we have established a set, we need to check
+         * that this is in it.
          *
-         * Here we check if it is a digit. */
+         * But there are cases we can rule out without having to look up if
+         * this is a digit:
+         *   a.  All instances of [0-9] have been dealt with earlier.
+         *   b.  The next digit encoded by Unicode is 1600 code points further
+         *       on, so if the code point in this loop iteration is less than
+         *       that, it isn't a digit.
+         *   c.  Most scripts that have digits have a single set of 10.  If
+         *       we've encountered a digit in such a script, 'zero_of_run' is
+         *       set to the code point (call it z) whose numeric value is 0.
+         *       If the code point in this loop iteration is in the range
+         *       z..z+9, it is in the script's set of 10, and we've actually
+         *       handled it earlier in this function and won't reach this
+         *       point.  But, code points in that script that aren't in that
+         *       range can't be digits, so we don't have to look any such up.
+         *       We can tell if this script is such a one by looking at
+         *       'script_zeros[]' for it.  It is non-zero iff it has a single
+         *       set of digits.  This rule doesn't apply if we haven't narrowed
+         *       down the possible scripts to a single one yet.  Nor if the
+         *       zero of the run is '0', as that also hasn't narrowed things
+         *       down completely */
         if (    cp >= FIRST_NON_ASCII_DECIMAL_DIGIT
-            && (   (          zero_of_run == 0
-                    || (  (   script_of_char >= 0
-                           && script_zeros[script_of_char] == 0)
-                        ||    intersection))))
+            && (   intersection
+                || script_of_char < 0   /* Also implies an intersection */
+                || zero_of_run == '0'
+                || script_zeros[script_of_char] == 0))
         {
             SSize_t range_zero_index;
             range_zero_index = _invlist_search(decimals_invlist, cp);
@@ -10662,7 +10662,7 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
                 && ELEMENT_RANGE_MATCHES_INVLIST(range_zero_index))
             {
                 UV range_zero = decimals_array[range_zero_index];
-                if (zero_of_run) {
+                if (SEEN_A_DIGIT) {
                     if (zero_of_run != range_zero) {
                         retval = FALSE;
                         break;