This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Use better method for setting debug offsets
[perl5.git] / regcomp.c
index ffe4bec..ee67f5a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -219,6 +219,7 @@ struct RExC_state_t {
 #define RExC_mysv2     (pRExC_state->mysv2)
 
 #endif
+    bool        seen_d_op;
     bool        strict;
     bool        study_started;
     bool        in_script_run;
@@ -239,6 +240,8 @@ struct RExC_state_t {
 #define RExC_parse     (pRExC_state->parse)
 #define RExC_latest_warn_offset (pRExC_state->latest_warn_offset )
 #define RExC_whilem_seen       (pRExC_state->whilem_seen)
+#define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
+                                                   under /d from /u ? */
 
 
 #ifdef RE_TRACK_PATTERN_OFFSETS
@@ -349,8 +352,14 @@ struct RExC_state_t {
             if (DEPENDS_SEMANTICS) {                                        \
                 set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
                 RExC_uni_semantics = 1;                                     \
+                if (RExC_seen_d_op && LIKELY(RExC_total_parens >= 0)) {     \
+                    /* No need to restart the parse if we haven't seen      \
+                     * anything that differs between /u and /d, and no need \
+                     * to restart immediately if we're going to reparse     \
+                     * anyway to count parens */                            \
                     *flagp |= RESTART_PARSE;                                \
                     return restart_retval;                                  \
+                }                                                           \
             }                                                               \
     } STMT_END
 
@@ -358,8 +367,12 @@ struct RExC_state_t {
 #define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
     STMT_START {                                                            \
                 RExC_use_BRANCHJ = 1;                                       \
-                *flagp |= RESTART_PARSE;                                    \
-                return restart_retval;                                      \
+                if (LIKELY(RExC_total_parens >= 0)) {                       \
+                    /* No need to restart the parse immediately if we're    \
+                     * going to reparse anyway to count parens */           \
+                    *flagp |= RESTART_PARSE;                                \
+                    return restart_retval;                                  \
+                }                                                           \
     } STMT_END
 
 #define REQUIRE_PARENS_PASS                                                 \
@@ -367,33 +380,26 @@ struct RExC_state_t {
                     if (RExC_total_parens == 0) RExC_total_parens = -1;     \
     } STMT_END
 
-/* Executes a return statement with the value 'X', if 'flags' contains any of
- * 'RESTART_PARSE', 'NEED_UTF8', or 'extra'.  If so, *flagp is set to those
- * flags */
-#define RETURN_X_ON_RESTART_OR_FLAGS(X, flags, flagp, extra)                \
+/* This is used to return failure (zero) early from the calling function if
+ * various flags in 'flags' are set.  Two flags always cause a return:
+ * 'RESTART_PARSE' and 'NEED_UTF8'.   'extra' can be used to specify any
+ * additional flags that should cause a return; 0 if none.  If the return will
+ * be done, '*flagp' is first set to be all of the flags that caused the
+ * return. */
+#define RETURN_FAIL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
     STMT_START {                                                            \
             if ((flags) & (RESTART_PARSE|NEED_UTF8|(extra))) {              \
                 *(flagp) = (flags) & (RESTART_PARSE|NEED_UTF8|(extra));     \
-                return X;                                                   \
+                return 0;                                                   \
             }                                                               \
     } STMT_END
 
-#define RETURN_FAIL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
-                    RETURN_X_ON_RESTART_OR_FLAGS(0,flags,flagp,extra)
-
-#define RETURN_X_ON_RESTART(X, flags,flagp)                                 \
-                        RETURN_X_ON_RESTART_OR_FLAGS( X, flags, flagp, 0)
-
-
-#define RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp,extra)                  \
-            if (*(flagp) & (RESTART_PARSE|(extra))) return 0
-
 #define MUST_RESTART(flags) ((flags) & (RESTART_PARSE))
 
 #define RETURN_FAIL_ON_RESTART(flags,flagp)                                 \
-                                    RETURN_X_ON_RESTART(0, flags,flagp)
+                        RETURN_FAIL_ON_RESTART_OR_FLAGS( flags, flagp, 0)
 #define RETURN_FAIL_ON_RESTART_FLAGP(flagp)                                 \
-                            RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp, 0)
+                                    if (MUST_RESTART(*(flagp))) return 0
 
 /* This converts the named class defined in regcomp.h to its equivalent class
  * number defined in handy.h. */
@@ -5624,21 +5630,16 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                                           (regnode_charclass *) scan);
                    break;
 
+                case NANYOFM:
                 case ANYOFM:
                   {
                     SV* cp_list = get_ANYOFM_contents(scan);
 
                     if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_union(data->start_class,
-                                  cp_list,
-                                  FALSE /* don't invert */
-                                  );
+                        ssc_union(data->start_class, cp_list, invert);
                     }
                     else if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class,
-                                         cp_list,
-                                         FALSE /* don't invert */
-                                         );
+                        ssc_intersection(data->start_class, cp_list, invert);
                     }
 
                     SvREFCNT_dec_NN(cp_list);
@@ -7279,6 +7280,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_close_parens = NULL;
     RExC_paren_names = NULL;
     RExC_size = 0;
+    RExC_seen_d_op = FALSE;
 #ifdef DEBUGGING
     RExC_paren_name_list = NULL;
 #endif
@@ -7532,9 +7534,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         goto redo_parse;
     }
 
-    /* In a stable state, as here, this must be true */
-    assert(RExC_size = RExC_emit + 1);
-
     /* Here, we have successfully parsed and generated the pattern's program
      * for the regex engine.  We are ready to finish things up and look for
      * optimizations. */
@@ -12761,7 +12760,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
     SvREFCNT_dec_NN(substitute_parse);
 
     if (! *node_p) {
-        RETURN_X_ON_RESTART(FALSE, flags, flagp);
+        RETURN_FAIL_ON_RESTART(flags, flagp);
         FAIL2("panic: reg returned failure to grok_bslash_N, flags=%#" UVxf,
             (UV) flags);
     }
@@ -13146,10 +13145,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                        FALSE, /* don't silence non-portable warnings. */
                        (bool) RExC_strict,
                        TRUE, /* Allow an optimized regnode result */
-                       NULL,
                        NULL);
         if (ret == 0) {
-            RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp, NEED_UTF8);
+            RETURN_FAIL_ON_RESTART_FLAGP(flagp);
             FAIL2("panic: regclass returned failure to regatom, flags=%#" UVxf,
                   (UV) *flagp);
         }
@@ -13268,7 +13266,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             RExC_seen |= REG_LOOKBEHIND_SEEN;
            op = BOUND + charset;
 
-            if (op == BOUNDL) {
+            if (op == BOUND) {
+                RExC_seen_d_op = TRUE;
+            }
+            else if (op == BOUNDL) {
                 RExC_contains_locale = 1;
             }
 
@@ -13417,6 +13418,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             else if (op == POSIXL) {
                 RExC_contains_locale = 1;
             }
+            else if (op == POSIXD) {
+                RExC_seen_d_op = TRUE;
+            }
 
           join_posix_op_known:
 
@@ -13452,7 +13456,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                      non-portables */
                            (bool) RExC_strict,
                            TRUE, /* Allow an optimized regnode result */
-                           NULL,
                            NULL);
             RETURN_FAIL_ON_RESTART_FLAGP(flagp);
             /* regclass() can only return RESTART_PARSE and NEED_UTF8 if
@@ -13635,6 +13638,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                        ? REFFL
                                        : REFF),
                                 num);
+                if (OP(REGNODE_p(ret)) == REFF) {
+                    RExC_seen_d_op = TRUE;
+                }
                 *flagp |= HASWIDTH;
 
                 /* override incorrect value set in reganode MJD */
@@ -14101,14 +14107,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 if (! FOLD) {  /* The simple case, just append the literal */
 
                       not_fold_common:
-                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
+                        if (UVCHR_IS_INVARIANT(ender) || ! UTF) {
+                            *(s++) = (char) ender;
+                        }
+                        else {
                             U8 * new_s = uvchr_to_utf8((U8*)s, ender);
                             added_len = (char *) new_s - s;
                             s = (char *) new_s;
                         }
-                        else {
-                            *(s++) = (char) ender;
-                        }
                 }
                 else if (LOC && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)) {
 
@@ -14138,156 +14144,104 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      * character, and wait until runtime to fold it */
                     goto not_fold_common;
                 }
-                else                /* A regular FOLD code point */
-                     if (! UTF)
+                else /* regular fold; see if actually is in a fold */
+                     if (   (ender < 256 && ! IS_IN_SOME_FOLD_L1(ender))
+                         || (ender > 255
+                            && ! _invlist_contains_cp(PL_utf8_foldable, ender)))
                 {
-                    /* Here, are folding and are not UTF-8 encoded; therefore
-                     * the character must be in the range 0-255, and is not /l.
-                     * (Not /l because we already handled these under /l in
-                     * is_PROBLEMATIC_LOCALE_FOLD_cp) */
-                    if (! IS_IN_SOME_FOLD_L1(ender)) {
-
-                        /* Start a new node for this non-folding character if
-                         * previous ones in the node were folded */
-                        if (len && node_type != EXACT) {
-                            p = oldp;
-                            goto loopdone;
-                        }
+                    /* Here, folding, but the character isn't in a fold.
+                     *
+                     * Start a new node if previous characters in the node were
+                     * folded */
+                    if (len && node_type != EXACT) {
+                        p = oldp;
+                        goto loopdone;
+                    }
+
+                    /* Here, continuing a node with non-folded characters.  Add
+                     * this one */
 
+                    if (UVCHR_IS_INVARIANT(ender) || ! UTF) {
                         *(s++) = (char) ender;
                     }
-                    else {  /* Here, does participate in some fold */
-
-                        /* if this is the first character in the node, change
-                         * its type to folding.  Otherwise, if this is the
-                         * first folding character in the node, close up the
-                         * existing node, so can start a new node with this
-                         * one.  */
-                        if (! len) {
-                            node_type = compute_EXACTish(pRExC_state);
+                    else {
+                        s = (char *) uvchr_to_utf8((U8 *) s, ender);
+                        added_len = UVCHR_SKIP(ender);
+                    }
+                }
+                else {  /* Here, does participate in some fold */
+
+                    /* If this is the first character in the node, change its
+                     * type to folding.  Otherwise, if this is the first
+                     * folding character in the node, close up the existing
+                     * node, so can start a new node with this one.  */
+                    if (! len) {
+                        node_type = compute_EXACTish(pRExC_state);
+                    }
+                    else if (node_type == EXACT) {
+                        p = oldp;
+                        goto loopdone;
+                    }
+
+                    if (UTF) {  /* For UTF-8, we add the folded value */
+                        if (UVCHR_IS_INVARIANT(ender)) {
+                            *(s)++ = (U8) toFOLD(ender);
                         }
-                        else if (node_type == EXACT) {
-                            p = oldp;
-                            goto loopdone;
+                        else {
+                            ender = _to_uni_fold_flags(
+                                    ender,
+                                    (U8 *) s,
+                                    &added_len,
+                                    FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
+                                                    ? FOLD_FLAGS_NOMIX_ASCII
+                                                    : 0));
+                            s += added_len;
                         }
+                    }
+                    else {
 
-                        /* See if the character's fold differs between /d and
-                         * /u.  On non-ancient Unicode versions, this includes
-                         * the multi-char fold SHARP S to 'ss' */
+                        /* Here is non-UTF8; we don't normally store the folded
+                         * value.  First, see if the character's fold differs
+                         * between /d and /u. */
+                        if (PL_fold[ender] != PL_fold_latin1[ender]) {
+                            maybe_exactfu = FALSE;
+                        }
 
 #if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
    || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
                                       || UNICODE_DOT_DOT_VERSION > 0)
 
-                        if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
+                        /* On non-ancient Unicode versions, this includes the
+                         * multi-char fold SHARP S to 'ss' */
 
-                            /* See comments for join_exact() as to why we fold
-                             * this non-UTF at compile time */
-                            if (node_type == EXACTFU) {
-                                *(s++) = 's';
+                        else if (UNLIKELY(   ender == LATIN_SMALL_LETTER_SHARP_S
+                                          || (   len
+                                              && isALPHA_FOLD_EQ(ender, 's')
+                                              && isALPHA_FOLD_EQ(*(s-1), 's'))))
+                        {
 
-                                /* Let the code below add in the extra 's' */
-                                ender = 's';
-                                added_len = 2;
+                            if (node_type == EXACTFU) {
+                                /* See comments for join_exact() as to why we
+                                 * fold this non-UTF at compile time */
+                                if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
+                                    *(s++) = 's';
+
+                                    /* Let the code below add in the extra 's' */
+                                    ender = 's';
+                                    added_len = 2;
+                                }
                             }
-                            else if (! RExC_uni_semantics) {
+                            else {
                                 maybe_exactfu = FALSE;
                             }
                         }
-                        else if (   len
-                                 && isALPHA_FOLD_EQ(ender, 's')
-                                 && isALPHA_FOLD_EQ(*(s-1), 's'))
-                        {
-                            maybe_exactfu = FALSE;
-                        }
-                        else
 #endif
 
-                        if (PL_fold[ender] != PL_fold_latin1[ender]) {
-                            maybe_exactfu = FALSE;
-                        }
-
                         /* Even when folding, we store just the input
                          * character, as we have an array that finds its fold
                          * quickly */
                         *(s++) = (char) ender;
                     }
-                }
-                else {  /* FOLD, and UTF */
-                    /* Unlike the non-fold case, we do actually have to
-                     * calculate the fold in pass 1.  This is for two reasons,
-                     * the folded length may be longer than the unfolded, and
-                     * we have to calculate how many EXACTish nodes it will
-                     * take; and we may run out of room in a node in the middle
-                     * of a potential multi-char fold, and have to back off
-                     * accordingly.  */
-
-                    if (isASCII_uni(ender)) {
-
-                        /* As above, we close up and start a new node if the
-                         * previous characters don't match the fold/non-fold
-                         * state of this one.  And if this is the first
-                         * character in the node, and it folds, we change the
-                         * node away from being EXACT */
-                        if (! IS_IN_SOME_FOLD_L1(ender)) {
-                            if (len && node_type != EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            *(s)++ = (U8) ender;
-                        }
-                        else {  /* Is in a fold */
-
-                            if (! len) {
-                                node_type = compute_EXACTish(pRExC_state);
-                            }
-                            else if (node_type == EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            *(s)++ = (U8) toFOLD(ender);
-                        }
-                    }
-                    else {  /* Not ASCII */
-                        STRLEN foldlen;
-
-                        /* As above, we close up and start a new node if the
-                         * previous characters don't match the fold/non-fold
-                         * state of this one.  And if this is the first
-                         * character in the node, and it folds, we change the
-                         * node away from being EXACT */
-                        if (! _invlist_contains_cp(PL_utf8_foldable, ender)) {
-                            if (len && node_type != EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            s = (char *) uvchr_to_utf8((U8 *) s, ender);
-                            added_len = UVCHR_SKIP(ender);
-                        }
-                        else {
-
-                            if (! len) {
-                                node_type = compute_EXACTish(pRExC_state);
-                            }
-                            else if (node_type == EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            ender = _to_uni_fold_flags(
-                                     ender,
-                                     (U8 *) s,
-                                     &foldlen,
-                                     FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
-                                                        ? FOLD_FLAGS_NOMIX_ASCII
-                                                        : 0));
-                            s += foldlen;
-                            added_len = foldlen;
-                        }
-                    }
                } /* End of adding current character to the node */
 
                 len += added_len;
@@ -14494,6 +14448,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             OP(REGNODE_p(ret)) = EXACTFLU8;
                         }
                     }
+                    else if (node_type == EXACTF) {
+                        RExC_seen_d_op = TRUE;
+                    }
                 }
 
                 alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender,
@@ -15715,8 +15672,7 @@ redo_curchar:
                               FALSE, /* don't silence non-portable warnings.  */
                               TRUE,  /* strict */
                               FALSE, /* Require return to be an ANYOF */
-                              &current,
-                              NULL))
+                              &current))
                 {
                     FAIL2("panic: regclass returned failure to handle_sets, "
                           "flags=%#" UVxf, (UV) *flagp);
@@ -15753,9 +15709,7 @@ redo_curchar:
                                 TRUE, /* silence non-portable warnings. */
                                 TRUE, /* strict */
                                 FALSE, /* Require return to be an ANYOF */
-                                &current,
-                                NULL
-                                ))
+                                &current))
                 {
                     FAIL2("panic: regclass returned failure to handle_sets, "
                           "flags=%#" UVxf, (UV) *flagp);
@@ -16105,7 +16059,6 @@ redo_curchar:
                              they're valid on this machine */
                     FALSE, /* similarly, no need for strict */
                     FALSE, /* Require return to be an ANYOF */
-                    NULL,
                     NULL
                 );
 
@@ -16300,47 +16253,33 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
 }
 
 STATIC void
-S_output_or_return_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings, AV** return_posix_warnings)
+S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings)
 {
-    /* If the final parameter is NULL, output the elements of the array given
-     * by '*posix_warnings' as REGEXP warnings.  Otherwise, the elements are
-     * pushed onto it, (creating if necessary) */
+    /* Output the elements of the array given by '*posix_warnings' as REGEXP
+     * warnings. */
 
     SV * msg;
-    const bool first_is_fatal =  ! return_posix_warnings
-                                && ckDEAD(packWARN(WARN_REGEXP));
+    const bool first_is_fatal = ckDEAD(packWARN(WARN_REGEXP));
 
-    PERL_ARGS_ASSERT_OUTPUT_OR_RETURN_POSIX_WARNINGS;
+    PERL_ARGS_ASSERT_OUTPUT_POSIX_WARNINGS;
+
+    if (! TO_OUTPUT_WARNINGS(RExC_parse)) {
+        return;
+    }
 
     while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
-        if (return_posix_warnings) {
-            if (! *return_posix_warnings) { /* mortalize to not leak if
-                                               warnings are fatal */
-                *return_posix_warnings = (AV *) sv_2mortal((SV *) newAV());
-            }
-            av_push(*return_posix_warnings, msg);
-        }
-        else {
-            if (first_is_fatal) {           /* Avoid leaking this */
-                av_undef(posix_warnings);   /* This isn't necessary if the
-                                               array is mortal, but is a
-                                               fail-safe */
-                (void) sv_2mortal(msg);
-                if (ckDEAD(packWARN(WARN_REGEXP))) {
-                    PREPARE_TO_DIE;
-                }
-            }
-            if (TO_OUTPUT_WARNINGS(RExC_parse)) {
-                Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s",
-                                           SvPVX(msg));
-            }
-            SvREFCNT_dec_NN(msg);
+        if (first_is_fatal) {           /* Avoid leaking this */
+            av_undef(posix_warnings);   /* This isn't necessary if the
+                                            array is mortal, but is a
+                                            fail-safe */
+            (void) sv_2mortal(msg);
+            PREPARE_TO_DIE;
         }
+        Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
+        SvREFCNT_dec_NN(msg);
     }
 
-    if (! return_posix_warnings) {
-        UPDATE_WARNINGS_LOC(RExC_parse);
-    }
+    UPDATE_WARNINGS_LOC(RExC_parse);
 }
 
 STATIC AV *
@@ -16423,8 +16362,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  const bool strict,
                  bool optimizable,                  /* ? Allow a non-ANYOF return
                                                        node */
-                 SV** ret_invlist, /* Return an inversion list, not a node */
-                 AV** return_posix_warnings
+                 SV** ret_invlist  /* Return an inversion list, not a node */
           )
 {
     /* parse a bracketed class specification.  Most of these will produce an
@@ -16537,8 +16475,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
     bool warn_super = ALWAYS_WARN_SUPER;
 
-    const regnode_offset orig_emit = RExC_emit; /* Save the original RExC_emit in
-        case we need to change the emitted regop to an EXACT. */
     const char * orig_parse = RExC_parse;
     bool posixl_matches_all = FALSE; /* Does /l class have both e.g. \W,\w ? */
 
@@ -16550,7 +16486,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     char *not_posix_region_end = RExC_parse - 1;
 
     AV* posix_warnings = NULL;
-    const bool do_posix_warnings = return_posix_warnings || ckWARN(WARN_REGEXP);
+    const bool do_posix_warnings = ckWARN(WARN_REGEXP);
     U8 op = END;    /* The returned node-type, initialized to an impossible
                        one.  */
     U8 anyof_flags = 0;   /* flag bits if the node is an ANYOF-type */
@@ -16564,6 +16500,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
     PERL_UNUSED_ARG(depth);
 #endif
 
+
+    /* If wants an inversion list returned, we can't optimize to something
+     * else. */
+    if (ret_invlist) {
+        optimizable = FALSE;
+    }
+
     DEBUG_PARSE("clas");
 
 #if UNICODE_MAJOR_VERSION < 3 /* no multifolds in early Unicode */      \
@@ -16626,12 +16569,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         {
             /* Warnings about posix class issues are considered tentative until
              * we are far enough along in the parse that we can no longer
-             * change our mind, at which point we either output them or add
-             * them, if it has so specified, to what gets returned to the
-             * caller.  This is done each time through the loop so that a later
-             * class won't zap them before they have been dealt with. */
-            output_or_return_posix_warnings(pRExC_state, posix_warnings,
-                                            return_posix_warnings);
+             * change our mind, at which point we output them.  This is done
+             * each time through the loop so that a later class won't zap them
+             * before they have been dealt with. */
+            output_posix_warnings(pRExC_state, posix_warnings);
         }
 
         if  (RExC_parse >= stop_ptr) {
@@ -17661,10 +17602,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
        range = 0; /* this range (if it was one) is done now */
     } /* End of loop through all the text within the brackets */
 
-
     if (   posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
-        output_or_return_posix_warnings(pRExC_state, posix_warnings,
-                                        return_posix_warnings);
+        output_posix_warnings(pRExC_state, posix_warnings);
     }
 
     /* If anything in the class expands to more than one character, we have to
@@ -18120,10 +18059,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  || (anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)))
     {
         use_anyofd = TRUE;
+        RExC_seen_d_op = TRUE;
         optimizable = FALSE;
     }
 
-
     /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
      * at compile time.  Besides not inverting folded locale now, we can't
      * invert if there are things such as \w, which aren't known until runtime
@@ -18159,7 +18098,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
 
     if (optimizable) {
         int posix_class = -1;   /* Illegal value */
-        const char * cur_parse= RExC_parse;
         U8 ANYOFM_mask = 0xFF;
         U32 anode_arg = 0;
         UV start, end;
@@ -18331,26 +18269,30 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  * usage, is optimizable into ANYOFM, and can benefit from the
                  * speed up.  We can only do this on UTF-8 invariant bytes,
                  * because the variance would throw this off.  */
-                if (   op == END
-                    && invlist_highest(cp_list) <=
+                if (op == END) {
+                    PERL_UINT_FAST8_T inverted = 0;
 #ifdef EBCDIC
-                                                   0xFF
+                    const PERL_UINT_FAST8_T max_permissible = 0xFF;
 #else
-                                                   0x7F
+                    const PERL_UINT_FAST8_T max_permissible = 0x7F;
 #endif
-                ) {
+                    if (invlist_highest(cp_list) > max_permissible) {
+                        _invlist_invert(cp_list);
+                        inverted = 1;
+                    }
+
+                    if (invlist_highest(cp_list) <= max_permissible) {
                     Size_t cp_count = 0;
                     bool first_time = TRUE;
                     unsigned int lowest_cp = 0xFF;
                     U8 bits_differing = 0;
 
-                    /* Only needed on EBCDIC, as there, variants and non- are
-                     * mixed together.  Could #ifdef it out on ASCII, but
-                     * probably the compiler will optimize it out */
+                    /* Only needed on EBCDIC, as there, variants and non- are mixed
+                     * together.  Could #ifdef it out on ASCII, but probably the
+                     * compiler will optimize it out */
                     bool has_variant = FALSE;
 
-                    /* Go through the bytes and find the bit positions that
-                     * differ */
+                    /* Go through the bytes and find the bit positions that differ */
                     invlist_iterinit(cp_list);
                     while (invlist_iternext(cp_list, &start, &end)) {
                         unsigned int i = start;
@@ -18401,8 +18343,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     if ( ! has_variant
                         && cp_count == 1U << PL_bitcount[bits_differing])
                     {
-                        assert(cp_count > 1);
-                        op = ANYOFM;
+                        assert(inverted || cp_count > 1);
+                        op = ANYOFM + inverted;;
 
                         /* We need to make the bits that differ be 0's */
                         ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS
@@ -18413,20 +18355,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         *flagp |= HASWIDTH|SIMPLE;
                     }
                 }
+                if (inverted) {
+                    _invlist_invert(cp_list);
+                }
+            }
             }
         }
 
         if (op != END) {
-            RExC_parse = (char *)orig_parse;
-            RExC_emit = orig_emit;
-
             if (regarglen[op]) {
                 ret = reganode(pRExC_state, op, anode_arg);
             } else {
                 ret = reg_node(pRExC_state, op);
             }
-
-            RExC_parse = (char *)cur_parse;
+            Set_Node_Offset_Length(REGNODE_p(ret), orig_parse - RExC_start,
+                                                   RExC_parse - orig_parse);;
 
             if (PL_regkind[op] == EXACT) {
                 alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value,
@@ -19326,8 +19269,8 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
 STATIC SV*
 S_get_ANYOFM_contents(pTHX_ const regnode * n) {
 
-    /* Returns an inversion list of all the code points matched by the ANYOFM
-     * node 'n' */
+    /* Returns an inversion list of all the code points matched by the
+     * ANYOFM/NANYOFM node 'n' */
 
     SV * cp_list = _new_invlist(-1);
     const U8 lowest = (U8) ARG(n);
@@ -19350,6 +19293,9 @@ S_get_ANYOFM_contents(pTHX_ const regnode * n) {
         }
     }
 
+    if (OP(n) == NANYOFM) {
+        _invlist_invert(cp_list);
+    }
     return cp_list;
 }
 
@@ -19883,6 +19829,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         SV * cp_list = get_ANYOFM_contents(o);
 
        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+        if (OP(o) == NANYOFM) {
+            _invlist_invert(cp_list);
+        }
+
         put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, TRUE);
        Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);