regcomp.c: Use better method for setting debug offsets

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index ffe4bec..ee67f5a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -219,6 +219,7 @@ struct RExC_state_t {
  #define RExC_mysv2     (pRExC_state->mysv2)
  
  #endif
+    bool        seen_d_op;
      bool        strict;
      bool        study_started;
      bool        in_script_run;
@@ -239,6 +240,8 @@ struct RExC_state_t {
  #define RExC_parse     (pRExC_state->parse)
  #define RExC_latest_warn_offset (pRExC_state->latest_warn_offset )
  #define RExC_whilem_seen       (pRExC_state->whilem_seen)
+#define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
+                                                   under /d from /u ? */
  
  
  #ifdef RE_TRACK_PATTERN_OFFSETS
@@ -349,8 +352,14 @@ struct RExC_state_t {
              if (DEPENDS_SEMANTICS) {                                        \
                  set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
                  RExC_uni_semantics = 1;                                     \
+                if (RExC_seen_d_op && LIKELY(RExC_total_parens >= 0)) {     \
+                    /* No need to restart the parse if we haven't seen      \
+                     * anything that differs between /u and /d, and no need \
+                     * to restart immediately if we're going to reparse     \
+                     * anyway to count parens */                            \
                      *flagp |= RESTART_PARSE;                                \
                      return restart_retval;                                  \
+                }                                                           \
              }                                                               \
      } STMT_END
  
@@ -358,8 +367,12 @@ struct RExC_state_t {
  #define REQUIRE_BRANCHJ(flagp, restart_retval)                              \
      STMT_START {                                                            \
                  RExC_use_BRANCHJ = 1;                                       \
-                *flagp |= RESTART_PARSE;                                    \
-                return restart_retval;                                      \
+                if (LIKELY(RExC_total_parens >= 0)) {                       \
+                    /* No need to restart the parse immediately if we're    \
+                     * going to reparse anyway to count parens */           \
+                    *flagp |= RESTART_PARSE;                                \
+                    return restart_retval;                                  \
+                }                                                           \
      } STMT_END
  
  #define REQUIRE_PARENS_PASS                                                 \
@@ -367,33 +380,26 @@ struct RExC_state_t {
                      if (RExC_total_parens == 0) RExC_total_parens = -1;     \
      } STMT_END
  
-/* Executes a return statement with the value 'X', if 'flags' contains any of
- * 'RESTART_PARSE', 'NEED_UTF8', or 'extra'.  If so, *flagp is set to those
- * flags */
-#define RETURN_X_ON_RESTART_OR_FLAGS(X, flags, flagp, extra)                \
+/* This is used to return failure (zero) early from the calling function if
+ * various flags in 'flags' are set.  Two flags always cause a return:
+ * 'RESTART_PARSE' and 'NEED_UTF8'.   'extra' can be used to specify any
+ * additional flags that should cause a return; 0 if none.  If the return will
+ * be done, '*flagp' is first set to be all of the flags that caused the
+ * return. */
+#define RETURN_FAIL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
      STMT_START {                                                            \
              if ((flags) & (RESTART_PARSE|NEED_UTF8|(extra))) {              \
                  *(flagp) = (flags) & (RESTART_PARSE|NEED_UTF8|(extra));     \
-                return X;                                                   \
+                return 0;                                                   \
              }                                                               \
      } STMT_END
  
-#define RETURN_FAIL_ON_RESTART_OR_FLAGS(flags,flagp,extra)                  \
-                    RETURN_X_ON_RESTART_OR_FLAGS(0,flags,flagp,extra)
-
-#define RETURN_X_ON_RESTART(X, flags,flagp)                                 \
-                        RETURN_X_ON_RESTART_OR_FLAGS( X, flags, flagp, 0)
-
-
-#define RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp,extra)                  \
-            if (*(flagp) & (RESTART_PARSE|(extra))) return 0
-
  #define MUST_RESTART(flags) ((flags) & (RESTART_PARSE))
  
  #define RETURN_FAIL_ON_RESTART(flags,flagp)                                 \
-                                    RETURN_X_ON_RESTART(0, flags,flagp)
+                        RETURN_FAIL_ON_RESTART_OR_FLAGS( flags, flagp, 0)
  #define RETURN_FAIL_ON_RESTART_FLAGP(flagp)                                 \
-                            RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp, 0)
+                                    if (MUST_RESTART(*(flagp))) return 0
  
  /* This converts the named class defined in regcomp.h to its equivalent class
   * number defined in handy.h. */
@@ -5624,21 +5630,16 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                                                            (regnode_charclass *) scan);
                     break;
  
+                case NANYOFM:
                  case ANYOFM:
                    {
                      SV* cp_list = get_ANYOFM_contents(scan);
  
                      if (flags & SCF_DO_STCLASS_OR) {
-                        ssc_union(data->start_class,
-                                  cp_list,
-                                  FALSE /* don't invert */
-                                  );
+                        ssc_union(data->start_class, cp_list, invert);
                      }
                      else if (flags & SCF_DO_STCLASS_AND) {
-                        ssc_intersection(data->start_class,
-                                         cp_list,
-                                         FALSE /* don't invert */
-                                         );
+                        ssc_intersection(data->start_class, cp_list, invert);
                      }
  
                      SvREFCNT_dec_NN(cp_list);
@@ -7279,6 +7280,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_close_parens = NULL;
      RExC_paren_names = NULL;
      RExC_size = 0;
+    RExC_seen_d_op = FALSE;
  #ifdef DEBUGGING
      RExC_paren_name_list = NULL;
  #endif
@@ -7532,9 +7534,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          goto redo_parse;
      }
  
-    /* In a stable state, as here, this must be true */
-    assert(RExC_size = RExC_emit + 1);
-
      /* Here, we have successfully parsed and generated the pattern's program
       * for the regex engine.  We are ready to finish things up and look for
       * optimizations. */
@@ -12761,7 +12760,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
      SvREFCNT_dec_NN(substitute_parse);
  
      if (! *node_p) {
-        RETURN_X_ON_RESTART(FALSE, flags, flagp);
+        RETURN_FAIL_ON_RESTART(flags, flagp);
          FAIL2("panic: reg returned failure to grok_bslash_N, flags=%#" UVxf,
              (UV) flags);
      }
@@ -13146,10 +13145,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         FALSE, /* don't silence non-portable warnings. */
                         (bool) RExC_strict,
                         TRUE, /* Allow an optimized regnode result */
-                       NULL,
                         NULL);
          if (ret == 0) {
-            RETURN_FAIL_ON_RESTART_FLAGP_OR_FLAGS(flagp, NEED_UTF8);
+            RETURN_FAIL_ON_RESTART_FLAGP(flagp);
              FAIL2("panic: regclass returned failure to regatom, flags=%#" UVxf,
                    (UV) *flagp);
          }
@@ -13268,7 +13266,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              RExC_seen |= REG_LOOKBEHIND_SEEN;
             op = BOUND + charset;
  
-            if (op == BOUNDL) {
+            if (op == BOUND) {
+                RExC_seen_d_op = TRUE;
+            }
+            else if (op == BOUNDL) {
                  RExC_contains_locale = 1;
              }
  
@@ -13417,6 +13418,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
              else if (op == POSIXL) {
                  RExC_contains_locale = 1;
              }
+            else if (op == POSIXD) {
+                RExC_seen_d_op = TRUE;
+            }
  
            join_posix_op_known:
  
@@ -13452,7 +13456,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                       non-portables */
                             (bool) RExC_strict,
                             TRUE, /* Allow an optimized regnode result */
-                           NULL,
                             NULL);
              RETURN_FAIL_ON_RESTART_FLAGP(flagp);
              /* regclass() can only return RESTART_PARSE and NEED_UTF8 if
@@ -13635,6 +13638,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                         ? REFFL
                                         : REFF),
                                  num);
+                if (OP(REGNODE_p(ret)) == REFF) {
+                    RExC_seen_d_op = TRUE;
+                }
                  *flagp |= HASWIDTH;
  
                  /* override incorrect value set in reganode MJD */
@@ -14101,14 +14107,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  if (! FOLD) {  /* The simple case, just append the literal */
  
                        not_fold_common:
-                        if (UTF && ! UVCHR_IS_INVARIANT(ender)) {
+                        if (UVCHR_IS_INVARIANT(ender) || ! UTF) {
+                            *(s++) = (char) ender;
+                        }
+                        else {
                              U8 * new_s = uvchr_to_utf8((U8*)s, ender);
                              added_len = (char *) new_s - s;
                              s = (char *) new_s;
                          }
-                        else {
-                            *(s++) = (char) ender;
-                        }
                  }
                  else if (LOC && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)) {
  
@@ -14138,156 +14144,104 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       * character, and wait until runtime to fold it */
                      goto not_fold_common;
                  }
-                else                /* A regular FOLD code point */
-                     if (! UTF)
+                else /* regular fold; see if actually is in a fold */
+                     if (   (ender < 256 && ! IS_IN_SOME_FOLD_L1(ender))
+                         || (ender > 255
+                            && ! _invlist_contains_cp(PL_utf8_foldable, ender)))
                  {
-                    /* Here, are folding and are not UTF-8 encoded; therefore
-                     * the character must be in the range 0-255, and is not /l.
-                     * (Not /l because we already handled these under /l in
-                     * is_PROBLEMATIC_LOCALE_FOLD_cp) */
-                    if (! IS_IN_SOME_FOLD_L1(ender)) {
-
-                        /* Start a new node for this non-folding character if
-                         * previous ones in the node were folded */
-                        if (len && node_type != EXACT) {
-                            p = oldp;
-                            goto loopdone;
-                        }
+                    /* Here, folding, but the character isn't in a fold.
+                     *
+                     * Start a new node if previous characters in the node were
+                     * folded */
+                    if (len && node_type != EXACT) {
+                        p = oldp;
+                        goto loopdone;
+                    }
+
+                    /* Here, continuing a node with non-folded characters.  Add
+                     * this one */
  
+                    if (UVCHR_IS_INVARIANT(ender) || ! UTF) {
                          *(s++) = (char) ender;
                      }
-                    else {  /* Here, does participate in some fold */
-
-                        /* if this is the first character in the node, change
-                         * its type to folding.  Otherwise, if this is the
-                         * first folding character in the node, close up the
-                         * existing node, so can start a new node with this
-                         * one.  */
-                        if (! len) {
-                            node_type = compute_EXACTish(pRExC_state);
+                    else {
+                        s = (char *) uvchr_to_utf8((U8 *) s, ender);
+                        added_len = UVCHR_SKIP(ender);
+                    }
+                }
+                else {  /* Here, does participate in some fold */
+
+                    /* If this is the first character in the node, change its
+                     * type to folding.  Otherwise, if this is the first
+                     * folding character in the node, close up the existing
+                     * node, so can start a new node with this one.  */
+                    if (! len) {
+                        node_type = compute_EXACTish(pRExC_state);
+                    }
+                    else if (node_type == EXACT) {
+                        p = oldp;
+                        goto loopdone;
+                    }
+
+                    if (UTF) {  /* For UTF-8, we add the folded value */
+                        if (UVCHR_IS_INVARIANT(ender)) {
+                            *(s)++ = (U8) toFOLD(ender);
                          }
-                        else if (node_type == EXACT) {
-                            p = oldp;
-                            goto loopdone;
+                        else {
+                            ender = _to_uni_fold_flags(
+                                    ender,
+                                    (U8 *) s,
+                                    &added_len,
+                                    FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
+                                                    ? FOLD_FLAGS_NOMIX_ASCII
+                                                    : 0));
+                            s += added_len;
                          }
+                    }
+                    else {
  
-                        /* See if the character's fold differs between /d and
-                         * /u.  On non-ancient Unicode versions, this includes
-                         * the multi-char fold SHARP S to 'ss' */
+                        /* Here is non-UTF8; we don't normally store the folded
+                         * value.  First, see if the character's fold differs
+                         * between /d and /u. */
+                        if (PL_fold[ender] != PL_fold_latin1[ender]) {
+                            maybe_exactfu = FALSE;
+                        }
  
  #if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
     || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
                                        || UNICODE_DOT_DOT_VERSION > 0)
  
-                        if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
+                        /* On non-ancient Unicode versions, this includes the
+                         * multi-char fold SHARP S to 'ss' */
  
-                            /* See comments for join_exact() as to why we fold
-                             * this non-UTF at compile time */
-                            if (node_type == EXACTFU) {
-                                *(s++) = 's';
+                        else if (UNLIKELY(   ender == LATIN_SMALL_LETTER_SHARP_S
+                                          || (   len
+                                              && isALPHA_FOLD_EQ(ender, 's')
+                                              && isALPHA_FOLD_EQ(*(s-1), 's'))))
+                        {
  
-                                /* Let the code below add in the extra 's' */
-                                ender = 's';
-                                added_len = 2;
+                            if (node_type == EXACTFU) {
+                                /* See comments for join_exact() as to why we
+                                 * fold this non-UTF at compile time */
+                                if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
+                                    *(s++) = 's';
+
+                                    /* Let the code below add in the extra 's' */
+                                    ender = 's';
+                                    added_len = 2;
+                                }
                              }
-                            else if (! RExC_uni_semantics) {
+                            else {
                                  maybe_exactfu = FALSE;
                              }
                          }
-                        else if (   len
-                                 && isALPHA_FOLD_EQ(ender, 's')
-                                 && isALPHA_FOLD_EQ(*(s-1), 's'))
-                        {
-                            maybe_exactfu = FALSE;
-                        }
-                        else
  #endif
  
-                        if (PL_fold[ender] != PL_fold_latin1[ender]) {
-                            maybe_exactfu = FALSE;
-                        }
-
                          /* Even when folding, we store just the input
                           * character, as we have an array that finds its fold
                           * quickly */
                          *(s++) = (char) ender;
                      }
-                }
-                else {  /* FOLD, and UTF */
-                    /* Unlike the non-fold case, we do actually have to
-                     * calculate the fold in pass 1.  This is for two reasons,
-                     * the folded length may be longer than the unfolded, and
-                     * we have to calculate how many EXACTish nodes it will
-                     * take; and we may run out of room in a node in the middle
-                     * of a potential multi-char fold, and have to back off
-                     * accordingly.  */
-
-                    if (isASCII_uni(ender)) {
-
-                        /* As above, we close up and start a new node if the
-                         * previous characters don't match the fold/non-fold
-                         * state of this one.  And if this is the first
-                         * character in the node, and it folds, we change the
-                         * node away from being EXACT */
-                        if (! IS_IN_SOME_FOLD_L1(ender)) {
-                            if (len && node_type != EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            *(s)++ = (U8) ender;
-                        }
-                        else {  /* Is in a fold */
-
-                            if (! len) {
-                                node_type = compute_EXACTish(pRExC_state);
-                            }
-                            else if (node_type == EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            *(s)++ = (U8) toFOLD(ender);
-                        }
-                    }
-                    else {  /* Not ASCII */
-                        STRLEN foldlen;
-
-                        /* As above, we close up and start a new node if the
-                         * previous characters don't match the fold/non-fold
-                         * state of this one.  And if this is the first
-                         * character in the node, and it folds, we change the
-                         * node away from being EXACT */
-                        if (! _invlist_contains_cp(PL_utf8_foldable, ender)) {
-                            if (len && node_type != EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            s = (char *) uvchr_to_utf8((U8 *) s, ender);
-                            added_len = UVCHR_SKIP(ender);
-                        }
-                        else {
-
-                            if (! len) {
-                                node_type = compute_EXACTish(pRExC_state);
-                            }
-                            else if (node_type == EXACT) {
-                                p = oldp;
-                                goto loopdone;
-                            }
-
-                            ender = _to_uni_fold_flags(
-                                     ender,
-                                     (U8 *) s,
-                                     &foldlen,
-                                     FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
-                                                        ? FOLD_FLAGS_NOMIX_ASCII
-                                                        : 0));
-                            s += foldlen;
-                            added_len = foldlen;
-                        }
-                    }
                 } /* End of adding current character to the node */
  
                  len += added_len;
@@ -14494,6 +14448,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                              OP(REGNODE_p(ret)) = EXACTFLU8;
                          }
                      }
+                    else if (node_type == EXACTF) {
+                        RExC_seen_d_op = TRUE;
+                    }
                  }
  
                  alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender,
@@ -15715,8 +15672,7 @@ redo_curchar:
                                FALSE, /* don't silence non-portable warnings.  */
                                TRUE,  /* strict */
                                FALSE, /* Require return to be an ANYOF */
-                              &current,
-                              NULL))
+                              &current))
                  {
                      FAIL2("panic: regclass returned failure to handle_sets, "
                            "flags=%#" UVxf, (UV) *flagp);
@@ -15753,9 +15709,7 @@ redo_curchar:
                                  TRUE, /* silence non-portable warnings. */
                                  TRUE, /* strict */
                                  FALSE, /* Require return to be an ANYOF */
-                                &current,
-                                NULL
-                                ))
+                                &current))
                  {
                      FAIL2("panic: regclass returned failure to handle_sets, "
                            "flags=%#" UVxf, (UV) *flagp);
@@ -16105,7 +16059,6 @@ redo_curchar:
                               they're valid on this machine */
                      FALSE, /* similarly, no need for strict */
                      FALSE, /* Require return to be an ANYOF */
-                    NULL,
                      NULL
                  );
  
@@ -16300,47 +16253,33 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
  }
  
  STATIC void
-S_output_or_return_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings, AV** return_posix_warnings)
+S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings)
  {
-    /* If the final parameter is NULL, output the elements of the array given
-     * by '*posix_warnings' as REGEXP warnings.  Otherwise, the elements are
-     * pushed onto it, (creating if necessary) */
+    /* Output the elements of the array given by '*posix_warnings' as REGEXP
+     * warnings. */
  
      SV * msg;
-    const bool first_is_fatal =  ! return_posix_warnings
-                                && ckDEAD(packWARN(WARN_REGEXP));
+    const bool first_is_fatal = ckDEAD(packWARN(WARN_REGEXP));
  
-    PERL_ARGS_ASSERT_OUTPUT_OR_RETURN_POSIX_WARNINGS;
+    PERL_ARGS_ASSERT_OUTPUT_POSIX_WARNINGS;
+
+    if (! TO_OUTPUT_WARNINGS(RExC_parse)) {
+        return;
+    }
  
      while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
-        if (return_posix_warnings) {
-            if (! *return_posix_warnings) { /* mortalize to not leak if
-                                               warnings are fatal */
-                *return_posix_warnings = (AV *) sv_2mortal((SV *) newAV());
-            }
-            av_push(*return_posix_warnings, msg);
-        }
-        else {
-            if (first_is_fatal) {           /* Avoid leaking this */
-                av_undef(posix_warnings);   /* This isn't necessary if the
-                                               array is mortal, but is a
-                                               fail-safe */
-                (void) sv_2mortal(msg);
-                if (ckDEAD(packWARN(WARN_REGEXP))) {
-                    PREPARE_TO_DIE;
-                }
-            }
-            if (TO_OUTPUT_WARNINGS(RExC_parse)) {
-                Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s",
-                                           SvPVX(msg));
-            }
-            SvREFCNT_dec_NN(msg);
+        if (first_is_fatal) {           /* Avoid leaking this */
+            av_undef(posix_warnings);   /* This isn't necessary if the
+                                            array is mortal, but is a
+                                            fail-safe */
+            (void) sv_2mortal(msg);
+            PREPARE_TO_DIE;
          }
+        Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
+        SvREFCNT_dec_NN(msg);
      }
  
-    if (! return_posix_warnings) {
-        UPDATE_WARNINGS_LOC(RExC_parse);
-    }
+    UPDATE_WARNINGS_LOC(RExC_parse);
  }
  
  STATIC AV *
@@ -16423,8 +16362,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   const bool strict,
                   bool optimizable,                  /* ? Allow a non-ANYOF return
                                                         node */
-                 SV** ret_invlist, /* Return an inversion list, not a node */
-                 AV** return_posix_warnings
+                 SV** ret_invlist  /* Return an inversion list, not a node */
            )
  {
      /* parse a bracketed class specification.  Most of these will produce an
@@ -16537,8 +16475,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      bool warn_super = ALWAYS_WARN_SUPER;
  
-    const regnode_offset orig_emit = RExC_emit; /* Save the original RExC_emit in
-        case we need to change the emitted regop to an EXACT. */
      const char * orig_parse = RExC_parse;
      bool posixl_matches_all = FALSE; /* Does /l class have both e.g. \W,\w ? */
  
@@ -16550,7 +16486,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      char *not_posix_region_end = RExC_parse - 1;
  
      AV* posix_warnings = NULL;
-    const bool do_posix_warnings = return_posix_warnings || ckWARN(WARN_REGEXP);
+    const bool do_posix_warnings = ckWARN(WARN_REGEXP);
      U8 op = END;    /* The returned node-type, initialized to an impossible
                         one.  */
      U8 anyof_flags = 0;   /* flag bits if the node is an ANYOF-type */
@@ -16564,6 +16500,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      PERL_UNUSED_ARG(depth);
  #endif
  
+
+    /* If wants an inversion list returned, we can't optimize to something
+     * else. */
+    if (ret_invlist) {
+        optimizable = FALSE;
+    }
+
      DEBUG_PARSE("clas");
  
  #if UNICODE_MAJOR_VERSION < 3 /* no multifolds in early Unicode */      \
@@ -16626,12 +16569,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          {
              /* Warnings about posix class issues are considered tentative until
               * we are far enough along in the parse that we can no longer
-             * change our mind, at which point we either output them or add
-             * them, if it has so specified, to what gets returned to the
-             * caller.  This is done each time through the loop so that a later
-             * class won't zap them before they have been dealt with. */
-            output_or_return_posix_warnings(pRExC_state, posix_warnings,
-                                            return_posix_warnings);
+             * change our mind, at which point we output them.  This is done
+             * each time through the loop so that a later class won't zap them
+             * before they have been dealt with. */
+            output_posix_warnings(pRExC_state, posix_warnings);
          }
  
          if  (RExC_parse >= stop_ptr) {
@@ -17661,10 +17602,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         range = 0; /* this range (if it was one) is done now */
      } /* End of loop through all the text within the brackets */
  
-
      if (   posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
-        output_or_return_posix_warnings(pRExC_state, posix_warnings,
-                                        return_posix_warnings);
+        output_posix_warnings(pRExC_state, posix_warnings);
      }
  
      /* If anything in the class expands to more than one character, we have to
@@ -18120,10 +18059,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   || (anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)))
      {
          use_anyofd = TRUE;
+        RExC_seen_d_op = TRUE;
          optimizable = FALSE;
      }
  
-
      /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
       * at compile time.  Besides not inverting folded locale now, we can't
       * invert if there are things such as \w, which aren't known until runtime
@@ -18159,7 +18098,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      if (optimizable) {
          int posix_class = -1;   /* Illegal value */
-        const char * cur_parse= RExC_parse;
          U8 ANYOFM_mask = 0xFF;
          U32 anode_arg = 0;
          UV start, end;
@@ -18331,26 +18269,30 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   * usage, is optimizable into ANYOFM, and can benefit from the
                   * speed up.  We can only do this on UTF-8 invariant bytes,
                   * because the variance would throw this off.  */
-                if (   op == END
-                    && invlist_highest(cp_list) <=
+                if (op == END) {
+                    PERL_UINT_FAST8_T inverted = 0;
  #ifdef EBCDIC
-                                                   0xFF
+                    const PERL_UINT_FAST8_T max_permissible = 0xFF;
  #else
-                                                   0x7F
+                    const PERL_UINT_FAST8_T max_permissible = 0x7F;
  #endif
-                ) {
+                    if (invlist_highest(cp_list) > max_permissible) {
+                        _invlist_invert(cp_list);
+                        inverted = 1;
+                    }
+
+                    if (invlist_highest(cp_list) <= max_permissible) {
                      Size_t cp_count = 0;
                      bool first_time = TRUE;
                      unsigned int lowest_cp = 0xFF;
                      U8 bits_differing = 0;
  
-                    /* Only needed on EBCDIC, as there, variants and non- are
-                     * mixed together.  Could #ifdef it out on ASCII, but
-                     * probably the compiler will optimize it out */
+                    /* Only needed on EBCDIC, as there, variants and non- are mixed
+                     * together.  Could #ifdef it out on ASCII, but probably the
+                     * compiler will optimize it out */
                      bool has_variant = FALSE;
  
-                    /* Go through the bytes and find the bit positions that
-                     * differ */
+                    /* Go through the bytes and find the bit positions that differ */
                      invlist_iterinit(cp_list);
                      while (invlist_iternext(cp_list, &start, &end)) {
                          unsigned int i = start;
@@ -18401,8 +18343,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      if ( ! has_variant
                          && cp_count == 1U << PL_bitcount[bits_differing])
                      {
-                        assert(cp_count > 1);
-                        op = ANYOFM;
+                        assert(inverted || cp_count > 1);
+                        op = ANYOFM + inverted;;
  
                          /* We need to make the bits that differ be 0's */
                          ANYOFM_mask = ~ bits_differing; /* This goes into FLAGS
@@ -18413,20 +18355,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          *flagp |= HASWIDTH|SIMPLE;
                      }
                  }
+                if (inverted) {
+                    _invlist_invert(cp_list);
+                }
+            }
              }
          }
  
          if (op != END) {
-            RExC_parse = (char *)orig_parse;
-            RExC_emit = orig_emit;
-
              if (regarglen[op]) {
                  ret = reganode(pRExC_state, op, anode_arg);
              } else {
                  ret = reg_node(pRExC_state, op);
              }
-
-            RExC_parse = (char *)cur_parse;
+            Set_Node_Offset_Length(REGNODE_p(ret), orig_parse - RExC_start,
+                                                   RExC_parse - orig_parse);;
  
              if (PL_regkind[op] == EXACT) {
                  alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value,
@@ -19326,8 +19269,8 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
  STATIC SV*
  S_get_ANYOFM_contents(pTHX_ const regnode * n) {
  
-    /* Returns an inversion list of all the code points matched by the ANYOFM
-     * node 'n' */
+    /* Returns an inversion list of all the code points matched by the
+     * ANYOFM/NANYOFM node 'n' */
  
      SV * cp_list = _new_invlist(-1);
      const U8 lowest = (U8) ARG(n);
@@ -19350,6 +19293,9 @@ S_get_ANYOFM_contents(pTHX_ const regnode * n) {
          }
      }
  
+    if (OP(n) == NANYOFM) {
+        _invlist_invert(cp_list);
+    }
      return cp_list;
  }
  
@@ -19883,6 +19829,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          SV * cp_list = get_ANYOFM_contents(o);
  
         Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+        if (OP(o) == NANYOFM) {
+            _invlist_invert(cp_list);
+        }
+
          put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, TRUE);
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);