PATCH: [perl #126141]: qr/]]]]][\\/ fails to raise error

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index f748e79..68c599f 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -105,6 +105,10 @@ EXTERN_C const struct regexp_engine my_reg_engine;
  #define MIN(a,b) ((a) < (b) ? (a) : (b))
  #endif
  
  #define MIN(a,b) ((a) < (b) ? (a) : (b))
  #endif
  
+#ifndef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
  /* this is a chain of data about sub patterns we are processing that
     need to be handled separately/specially in study_chunk. Its so
     we can simulate recursion without losing state.  */
  /* this is a chain of data about sub patterns we are processing that
     need to be handled separately/specially in study_chunk. Its so
     we can simulate recursion without losing state.  */
@@ -131,6 +135,7 @@ struct RExC_state_t {
      U32                flags;                  /* RXf_* are we folding, multilining? */
      U32                pm_flags;               /* PMf_* stuff from the calling PMOP */
      char       *precomp;               /* uncompiled string. */
      U32                flags;                  /* RXf_* are we folding, multilining? */
      U32                pm_flags;               /* PMf_* stuff from the calling PMOP */
      char       *precomp;               /* uncompiled string. */
+    char       *precomp_end;           /* pointer to end of uncompiled string. */
      REGEXP     *rx_sv;                 /* The SV that is the regexp. */
      regexp     *rx;                    /* perl core regexp structure */
      regexp_internal    *rxi;           /* internal data for regexp object
      REGEXP     *rx_sv;                 /* The SV that is the regexp. */
      regexp     *rx;                    /* perl core regexp structure */
      regexp_internal    *rxi;           /* internal data for regexp object
@@ -138,6 +143,8 @@ struct RExC_state_t {
      char       *start;                 /* Start of input for compile */
      char       *end;                   /* End of input for compile */
      char       *parse;                 /* Input-scan pointer. */
      char       *start;                 /* Start of input for compile */
      char       *end;                   /* End of input for compile */
      char       *parse;                 /* Input-scan pointer. */
+    char        *adjusted_start;        /* 'start', adjusted.  See code use */
+    STRLEN      precomp_adj;            /* an offset beyond precomp.  See code use */
      SSize_t    whilem_seen;            /* number of WHILEM in this expr */
      regnode    *emit_start;            /* Start of emitted-code area */
      regnode    *emit_bound;            /* First regnode outside of the
      SSize_t    whilem_seen;            /* number of WHILEM in this expr */
      regnode    *emit_start;            /* Start of emitted-code area */
      regnode    *emit_bound;            /* First regnode outside of the
@@ -192,7 +199,6 @@ struct RExC_state_t {
      scan_frame *frame_head;
      scan_frame *frame_last;
      U32         frame_count;
      scan_frame *frame_head;
      scan_frame *frame_last;
      U32         frame_count;
-    U32         strict;
  #ifdef ADD_TO_REGEXEC
      char       *starttry;              /* -Dr: where regtry was called. */
  #define RExC_starttry  (pRExC_state->starttry)
  #ifdef ADD_TO_REGEXEC
      char       *starttry;              /* -Dr: where regtry was called. */
  #define RExC_starttry  (pRExC_state->starttry)
@@ -215,11 +221,15 @@ struct RExC_state_t {
  
  #endif
      bool        seen_unfolded_sharp_s;
  
  #endif
      bool        seen_unfolded_sharp_s;
+    bool        strict;
  };
  
  #define RExC_flags     (pRExC_state->flags)
  #define RExC_pm_flags  (pRExC_state->pm_flags)
  #define RExC_precomp   (pRExC_state->precomp)
  };
  
  #define RExC_flags     (pRExC_state->flags)
  #define RExC_pm_flags  (pRExC_state->pm_flags)
  #define RExC_precomp   (pRExC_state->precomp)
+#define RExC_precomp_adj (pRExC_state->precomp_adj)
+#define RExC_adjusted_start  (pRExC_state->adjusted_start)
+#define RExC_precomp_end (pRExC_state->precomp_end)
  #define RExC_rx_sv     (pRExC_state->rx_sv)
  #define RExC_rx                (pRExC_state->rx)
  #define RExC_rxi       (pRExC_state->rxi)
  #define RExC_rx_sv     (pRExC_state->rx_sv)
  #define RExC_rx                (pRExC_state->rx)
  #define RExC_rxi       (pRExC_state->rxi)
@@ -554,9 +564,67 @@ static const scan_data_t zero_scan_data =
  #define REPORT_LOCATION " in regex; marked by " MARKER1    \
                          " in m/%"UTF8f MARKER2 "%"UTF8f"/"
  
  #define REPORT_LOCATION " in regex; marked by " MARKER1    \
                          " in m/%"UTF8f MARKER2 "%"UTF8f"/"
  
-#define REPORT_LOCATION_ARGS(offset)            \
-                UTF8fARG(UTF, offset, RExC_precomp), \
-                UTF8fARG(UTF, RExC_end - RExC_precomp - offset, RExC_precomp + offset)
+/* The code in this file in places uses one level of recursion with parsing
+ * rebased to an alternate string constructed by us in memory.  This can take
+ * the form of something that is completely different from the input, or
+ * something that uses the input as part of the alternate.  In the first case,
+ * there should be no possibility of an error, as we are in complete control of
+ * the alternate string.  But in the second case we don't control the input
+ * portion, so there may be errors in that.  Here's an example:
+ *      /[abc\x{DF}def]/ui
+ * is handled specially because \x{df} folds to a sequence of more than one
+ * character, 'ss'.  What is done is to create and parse an alternate string,
+ * which looks like this:
+ *      /(?:\x{DF}|[abc\x{DF}def])/ui
+ * where it uses the input unchanged in the middle of something it constructs,
+ * which is a branch for the DF outside the character class, and clustering
+ * parens around the whole thing. (It knows enough to skip the DF inside the
+ * class while in this substitute parse.) 'abc' and 'def' may have errors that
+ * need to be reported.  The general situation looks like this:
+ *
+ *              sI                       tI               xI       eI
+ * Input:       ----------------------------------------------------
+ * Constructed:         ---------------------------------------------------
+ *                      sC               tC               xC       eC     EC
+ *
+ * The input string sI..eI is the input pattern.  The string sC..EC is the
+ * constructed substitute parse string.  The portions sC..tC and eC..EC are
+ * constructed by us.  The portion tC..eC is an exact duplicate of the input
+ * pattern tI..eI.  In the diagram, these are vertically aligned.  Suppose that
+ * while parsing, we find an error at xC.  We want to display a message showing
+ * the real input string.  Thus we need to find the point xI in it which
+ * corresponds to xC.  xC >= tC, since the portion of the string sC..tC has
+ * been constructed by us, and so shouldn't have errors.  We get:
+ *
+ *      xI = sI + (tI - sI) + (xC - tC)
+ *
+ * and, the offset into sI is:
+ *
+ *      (xI - sI) = (tI - sI) + (xC - tC)
+ *
+ * When the substitute is constructed, we save (tI -sI) as RExC_precomp_adj,
+ * and we save tC as RExC_adjusted_start.
+ *
+ * During normal processing of the input pattern, everything points to that,
+ * with RExC_precomp_adj set to 0, and RExC_adjusted_start set to sI.
+ */
+
+#define tI_sI           RExC_precomp_adj
+#define tC              RExC_adjusted_start
+#define sC              RExC_precomp
+#define xI_offset(xC)   ((IV) (tI_sI + (xC - tC)))
+#define xI(xC)          (sC + xI_offset(xC))
+#define eC              RExC_precomp_end
+
+#define REPORT_LOCATION_ARGS(xC)                                            \
+    UTF8fARG(UTF,                                                           \
+             (xI(xC) > eC) /* Don't run off end */                          \
+              ? eC - sC   /* Length before the <--HERE */                   \
+              : xI_offset(xC),                                              \
+             sC),         /* The input pattern printed up to the <--HERE */ \
+    UTF8fARG(UTF,                                                           \
+             (xI(xC) > eC) ? 0 : eC - xI(xC), /* Length after <--HERE */    \
+             (xI(xC) > eC) ? eC : xI(xC))     /* pattern after <--HERE */
  
  /* Used to point after bad bytes for an error message, but avoid skipping
   * past a nul byte. */
  
  /* Used to point after bad bytes for an error message, but avoid skipping
   * past a nul byte. */
@@ -569,7 +637,7 @@ static const scan_data_t zero_scan_data =
   */
  #define _FAIL(code) STMT_START {                                       \
      const char *ellipses = "";                                         \
   */
  #define _FAIL(code) STMT_START {                                       \
      const char *ellipses = "";                                         \
-    IV len = RExC_end - RExC_precomp;                                  \
+    IV len = RExC_precomp_end - RExC_precomp;                                  \
                                                                         \
      if (!SIZE_ONLY)                                                    \
         SAVEFREESV(RExC_rx_sv);                                         \
                                                                         \
      if (!SIZE_ONLY)                                                    \
         SAVEFREESV(RExC_rx_sv);                                         \
@@ -593,10 +661,8 @@ static const scan_data_t zero_scan_data =
   * Simple_vFAIL -- like FAIL, but marks the current location in the scan
   */
  #define        Simple_vFAIL(m) STMT_START {                                    \
   * Simple_vFAIL -- like FAIL, but marks the current location in the scan
   */
  #define        Simple_vFAIL(m) STMT_START {                                    \
-    const IV offset =                                                   \
-        (RExC_parse > RExC_end ? RExC_end : RExC_parse) - RExC_precomp; \
      Perl_croak(aTHX_ "%s" REPORT_LOCATION,                             \
      Perl_croak(aTHX_ "%s" REPORT_LOCATION,                             \
-           m, REPORT_LOCATION_ARGS(offset));   \
+           m, REPORT_LOCATION_ARGS(RExC_parse));                       \
  } STMT_END
  
  /*
  } STMT_END
  
  /*
@@ -612,9 +678,8 @@ static const scan_data_t zero_scan_data =
   * Like Simple_vFAIL(), but accepts two arguments.
   */
  #define        Simple_vFAIL2(m,a1) STMT_START {                        \
   * Like Simple_vFAIL(), but accepts two arguments.
   */
  #define        Simple_vFAIL2(m,a1) STMT_START {                        \
-    const IV offset = RExC_parse - RExC_precomp;                       \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1,                     \
-                      REPORT_LOCATION_ARGS(offset));   \
+    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1,             \
+                      REPORT_LOCATION_ARGS(RExC_parse));       \
  } STMT_END
  
  /*
  } STMT_END
  
  /*
@@ -631,9 +696,8 @@ static const scan_data_t zero_scan_data =
   * Like Simple_vFAIL(), but accepts three arguments.
   */
  #define        Simple_vFAIL3(m, a1, a2) STMT_START {                   \
   * Like Simple_vFAIL(), but accepts three arguments.
   */
  #define        Simple_vFAIL3(m, a1, a2) STMT_START {                   \
-    const IV offset = RExC_parse - RExC_precomp;               \
      S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2,         \
      S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2,         \
-           REPORT_LOCATION_ARGS(offset));      \
+           REPORT_LOCATION_ARGS(RExC_parse));                  \
  } STMT_END
  
  /*
  } STMT_END
  
  /*
@@ -649,9 +713,8 @@ static const scan_data_t zero_scan_data =
   * Like Simple_vFAIL(), but accepts four arguments.
   */
  #define        Simple_vFAIL4(m, a1, a2, a3) STMT_START {               \
   * Like Simple_vFAIL(), but accepts four arguments.
   */
  #define        Simple_vFAIL4(m, a1, a2, a3) STMT_START {               \
-    const IV offset = RExC_parse - RExC_precomp;               \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2, a3,             \
-           REPORT_LOCATION_ARGS(offset));      \
+    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2, a3,     \
+           REPORT_LOCATION_ARGS(RExC_parse));                  \
  } STMT_END
  
  #define        vFAIL4(m,a1,a2,a3) STMT_START {                 \
  } STMT_END
  
  #define        vFAIL4(m,a1,a2,a3) STMT_START {                 \
@@ -661,20 +724,18 @@ static const scan_data_t zero_scan_data =
  } STMT_END
  
  /* A specialized version of vFAIL2 that works with UTF8f */
  } STMT_END
  
  /* A specialized version of vFAIL2 that works with UTF8f */
-#define vFAIL2utf8f(m, a1) STMT_START {            \
-    const IV offset = RExC_parse - RExC_precomp;   \
-    if (!SIZE_ONLY)                                \
-        SAVEFREESV(RExC_rx_sv);                    \
-    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, \
-            REPORT_LOCATION_ARGS(offset));         \
+#define vFAIL2utf8f(m, a1) STMT_START {             \
+    if (!SIZE_ONLY)                                 \
+        SAVEFREESV(RExC_rx_sv);                     \
+    S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1,  \
+            REPORT_LOCATION_ARGS(RExC_parse));      \
  } STMT_END
  
  #define vFAIL3utf8f(m, a1, a2) STMT_START {             \
  } STMT_END
  
  #define vFAIL3utf8f(m, a1, a2) STMT_START {             \
-    const IV offset = RExC_parse - RExC_precomp;        \
      if (!SIZE_ONLY)                                     \
          SAVEFREESV(RExC_rx_sv);                         \
      S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2,  \
      if (!SIZE_ONLY)                                     \
          SAVEFREESV(RExC_rx_sv);                         \
      S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2,  \
-            REPORT_LOCATION_ARGS(offset));              \
+            REPORT_LOCATION_ARGS(RExC_parse));          \
  } STMT_END
  
  /* These have asserts in them because of [perl #122671] Many warnings in
  } STMT_END
  
  /* These have asserts in them because of [perl #122671] Many warnings in
@@ -685,84 +746,86 @@ static const scan_data_t zero_scan_data =
  
  /* m is not necessarily a "literal string", in this macro */
  #define reg_warn_non_literal_string(loc, m) STMT_START {                \
  
  /* m is not necessarily a "literal string", in this macro */
  #define reg_warn_non_literal_string(loc, m) STMT_START {                \
-    const IV offset = loc - RExC_precomp;                               \
-    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION,      \
-            m, REPORT_LOCATION_ARGS(offset));       \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP),           \
+                                       "%s" REPORT_LOCATION,            \
+                                  m, REPORT_LOCATION_ARGS(loc));        \
  } STMT_END
  
  #define        ckWARNreg(loc,m) STMT_START {                                   \
  } STMT_END
  
  #define        ckWARNreg(loc,m) STMT_START {                                   \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,    \
-           REPORT_LOCATION_ARGS(offset));              \
+    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),        \
+                                          m REPORT_LOCATION,           \
+                                         REPORT_LOCATION_ARGS(loc));   \
  } STMT_END
  
  #define        vWARN(loc, m) STMT_START {                                      \
  } STMT_END
  
  #define        vWARN(loc, m) STMT_START {                                      \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,       \
-           REPORT_LOCATION_ARGS(offset));              \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP),           \
+                                       m REPORT_LOCATION,               \
+                                       REPORT_LOCATION_ARGS(loc));      \
  } STMT_END
  
  #define        vWARN_dep(loc, m) STMT_START {                                  \
  } STMT_END
  
  #define        vWARN_dep(loc, m) STMT_START {                                  \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), m REPORT_LOCATION,   \
-           REPORT_LOCATION_ARGS(offset));              \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),       \
+                                       m REPORT_LOCATION,               \
+                                      REPORT_LOCATION_ARGS(loc));      \
  } STMT_END
  
  #define        ckWARNdep(loc,m) STMT_START {                                   \
  } STMT_END
  
  #define        ckWARNdep(loc,m) STMT_START {                                   \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),                 \
-           m REPORT_LOCATION,                                          \
-           REPORT_LOCATION_ARGS(offset));              \
+    __ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),  \
+                                           m REPORT_LOCATION,          \
+                                           REPORT_LOCATION_ARGS(loc)); \
  } STMT_END
  
  } STMT_END
  
-#define        ckWARNregdep(loc,m) STMT_START {                                \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP),   \
-           m REPORT_LOCATION,                                          \
-           REPORT_LOCATION_ARGS(offset));              \
+#define        ckWARNregdep(loc,m) STMT_START {                                    \
+    __ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED,      \
+                                                      WARN_REGEXP),         \
+                                            m REPORT_LOCATION,             \
+                                            REPORT_LOCATION_ARGS(loc));    \
  } STMT_END
  
  } STMT_END
  
-#define        ckWARN2reg_d(loc,m, a1) STMT_START {                            \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP),                     \
-           m REPORT_LOCATION,                                          \
-           a1, REPORT_LOCATION_ARGS(offset));  \
+#define        ckWARN2reg_d(loc,m, a1) STMT_START {                                \
+    __ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP),          \
+                                           m REPORT_LOCATION,              \
+                                           a1, REPORT_LOCATION_ARGS(loc)); \
  } STMT_END
  
  } STMT_END
  
-#define        ckWARN2reg(loc, m, a1) STMT_START {                             \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,    \
-           a1, REPORT_LOCATION_ARGS(offset));  \
+#define        ckWARN2reg(loc, m, a1) STMT_START {                                 \
+    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),            \
+                                          m REPORT_LOCATION,               \
+                                          a1, REPORT_LOCATION_ARGS(loc));   \
  } STMT_END
  
  } STMT_END
  
-#define        vWARN3(loc, m, a1, a2) STMT_START {                             \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,               \
-           a1, a2, REPORT_LOCATION_ARGS(offset));      \
+#define        vWARN3(loc, m, a1, a2) STMT_START {                                 \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP),               \
+                                       m REPORT_LOCATION,                   \
+                                      a1, a2, REPORT_LOCATION_ARGS(loc));  \
  } STMT_END
  
  } STMT_END
  
-#define        ckWARN3reg(loc, m, a1, a2) STMT_START {                         \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,    \
-           a1, a2, REPORT_LOCATION_ARGS(offset));      \
+#define        ckWARN3reg(loc, m, a1, a2) STMT_START {                             \
+    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),            \
+                                          m REPORT_LOCATION,                \
+                                         a1, a2,                           \
+                                          REPORT_LOCATION_ARGS(loc));       \
  } STMT_END
  
  #define        vWARN4(loc, m, a1, a2, a3) STMT_START {                         \
  } STMT_END
  
  #define        vWARN4(loc, m, a1, a2, a3) STMT_START {                         \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,               \
-           a1, a2, a3, REPORT_LOCATION_ARGS(offset)); \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP),           \
+                                       m REPORT_LOCATION,               \
+                                      a1, a2, a3,                      \
+                                       REPORT_LOCATION_ARGS(loc));      \
  } STMT_END
  
  #define        ckWARN4reg(loc, m, a1, a2, a3) STMT_START {                     \
  } STMT_END
  
  #define        ckWARN4reg(loc, m, a1, a2, a3) STMT_START {                     \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,    \
-           a1, a2, a3, REPORT_LOCATION_ARGS(offset)); \
+    __ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),        \
+                                          m REPORT_LOCATION,            \
+                                         a1, a2, a3,                   \
+                                          REPORT_LOCATION_ARGS(loc));   \
  } STMT_END
  
  #define        vWARN5(loc, m, a1, a2, a3, a4) STMT_START {                     \
  } STMT_END
  
  #define        vWARN5(loc, m, a1, a2, a3, a4) STMT_START {                     \
-    const IV offset = loc - RExC_precomp;                              \
-    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,               \
-           a1, a2, a3, a4, REPORT_LOCATION_ARGS(offset)); \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP),           \
+                                       m REPORT_LOCATION,              \
+                                      a1, a2, a3, a4,                  \
+                                       REPORT_LOCATION_ARGS(loc));      \
  } STMT_END
  
  /* Macros for recording node offsets.   20001227 mjd@plover.com
  } STMT_END
  
  /* Macros for recording node offsets.   20001227 mjd@plover.com
@@ -938,6 +1001,151 @@ DEBUG_OPTIMISE_MORE_r(if(data){                                      \
      PerlIO_printf(Perl_debug_log,"\n");                              \
  });
  
      PerlIO_printf(Perl_debug_log,"\n");                              \
  });
  
+/* =========================================================
+ * BEGIN edit_distance stuff.
+ *
+ * This calculates how many single character changes of any type are needed to
+ * transform a string into another one.  It is taken from version 3.1 of
+ *
+ * https://metacpan.org/pod/Text::Levenshtein::Damerau::XS
+ */
+
+/* Our unsorted dictionary linked list.   */
+/* Note we use UVs, not chars. */
+
+struct dictionary{
+  UV key;
+  UV value;
+  struct dictionary* next;
+};
+typedef struct dictionary item;
+
+
+PERL_STATIC_INLINE item*
+push(UV key,item* curr)
+{
+    item* head;
+    Newxz(head, 1, item);
+    head->key = key;
+    head->value = 0;
+    head->next = curr;
+    return head;
+}
+
+
+PERL_STATIC_INLINE item*
+find(item* head, UV key)
+{
+    item* iterator = head;
+    while (iterator){
+        if (iterator->key == key){
+            return iterator;
+        }
+        iterator = iterator->next;
+    }
+
+    return NULL;
+}
+
+PERL_STATIC_INLINE item*
+uniquePush(item* head,UV key)
+{
+    item* iterator = head;
+
+    while (iterator){
+        if (iterator->key == key) {
+            return head;
+        }
+        iterator = iterator->next;
+    }
+
+    return push(key,head);
+}
+
+PERL_STATIC_INLINE void
+dict_free(item* head)
+{
+    item* iterator = head;
+
+    while (iterator) {
+        item* temp = iterator;
+        iterator = iterator->next;
+        Safefree(temp);
+    }
+
+    head = NULL;
+}
+
+/* End of Dictionary Stuff */
+
+/* All calculations/work are done here */
+STATIC int
+S_edit_distance(const UV* src,
+                const UV* tgt,
+                const STRLEN x,             /* length of src[] */
+                const STRLEN y,             /* length of tgt[] */
+                const SSize_t maxDistance
+)
+{
+    item *head = NULL;
+    UV swapCount,swapScore,targetCharCount,i,j;
+    UV *scores;
+    UV score_ceil = x + y;
+
+    PERL_ARGS_ASSERT_EDIT_DISTANCE;
+
+    /* intialize matrix start values */
+    Newxz(scores, ( (x + 2) * (y + 2)), UV);
+    scores[0] = score_ceil;
+    scores[1 * (y + 2) + 0] = score_ceil;
+    scores[0 * (y + 2) + 1] = score_ceil;
+    scores[1 * (y + 2) + 1] = 0;
+    head = uniquePush(uniquePush(head,src[0]),tgt[0]);
+
+    /* work loops    */
+    /* i = src index */
+    /* j = tgt index */
+    for (i=1;i<=x;i++) {
+        if (i < x)
+            head = uniquePush(head,src[i]);
+        scores[(i+1) * (y + 2) + 1] = i;
+        scores[(i+1) * (y + 2) + 0] = score_ceil;
+        swapCount = 0;
+
+        for (j=1;j<=y;j++) {
+            if (i == 1) {
+                if(j < y)
+                head = uniquePush(head,tgt[j]);
+                scores[1 * (y + 2) + (j + 1)] = j;
+                scores[0 * (y + 2) + (j + 1)] = score_ceil;
+            }
+
+            targetCharCount = find(head,tgt[j-1])->value;
+            swapScore = scores[targetCharCount * (y + 2) + swapCount] + i - targetCharCount - 1 + j - swapCount;
+
+            if (src[i-1] != tgt[j-1]){
+                scores[(i+1) * (y + 2) + (j + 1)] = MIN(swapScore,(MIN(scores[i * (y + 2) + j], MIN(scores[(i+1) * (y + 2) + j], scores[i * (y + 2) + (j + 1)])) + 1));
+            }
+            else {
+                swapCount = j;
+                scores[(i+1) * (y + 2) + (j + 1)] = MIN(scores[i * (y + 2) + j], swapScore);
+            }
+        }
+
+        find(head,src[i-1])->value = i;
+    }
+
+    {
+        IV score = scores[(x+1) * (y + 2) + (y + 1)];
+        dict_free(head);
+        Safefree(scores);
+        return (maxDistance != 0 && maxDistance < score)?(-1):score;
+    }
+}
+
+/* END of edit_distance() stuff
+ * ========================================================= */
+
  /* is c a control character for which we have a mnemonic? */
  #define isMNEMONIC_CNTRL(c) _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
  
  /* is c a control character for which we have a mnemonic? */
  #define isMNEMONIC_CNTRL(c) _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
  
@@ -1152,7 +1360,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
       * returned list must, and will, contain every code point that is a
       * possibility. */
  
       * returned list must, and will, contain every code point that is a
       * possibility. */
  
-    SV* invlist = sv_2mortal(_new_invlist(0));
+    SV* invlist = NULL;
      SV* only_utf8_locale_invlist = NULL;
      unsigned int i;
      const U32 n = ARG(node);
      SV* only_utf8_locale_invlist = NULL;
      unsigned int i;
      const U32 n = ARG(node);
@@ -1174,6 +1382,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
  
              /* Here, no compile-time swash, and there are things that won't be
               * known until runtime -- we have to assume it could be anything */
  
              /* Here, no compile-time swash, and there are things that won't be
               * known until runtime -- we have to assume it could be anything */
+            invlist = sv_2mortal(_new_invlist(1));
              return _add_range_to_invlist(invlist, 0, UV_MAX);
          }
          else if (ary[3] && ary[3] != &PL_sv_undef) {
              return _add_range_to_invlist(invlist, 0, UV_MAX);
          }
          else if (ary[3] && ary[3] != &PL_sv_undef) {
@@ -1184,13 +1393,17 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
          }
  
          /* Get the code points valid only under UTF-8 locales */
          }
  
          /* Get the code points valid only under UTF-8 locales */
-        if ((ANYOF_FLAGS(node) & ANYOF_LOC_FOLD)
+        if ((ANYOF_FLAGS(node) & ANYOFL_FOLD)
              && ary[2] && ary[2] != &PL_sv_undef)
          {
              only_utf8_locale_invlist = ary[2];
          }
      }
  
              && ary[2] && ary[2] != &PL_sv_undef)
          {
              only_utf8_locale_invlist = ary[2];
          }
      }
  
+    if (! invlist) {
+        invlist = sv_2mortal(_new_invlist(0));
+    }
+
      /* An ANYOF node contains a bitmap for the first NUM_ANYOF_CODE_POINTS
       * code points, and an inversion list for the others, but if there are code
       * points that should match only conditionally on the target string being
      /* An ANYOF node contains a bitmap for the first NUM_ANYOF_CODE_POINTS
       * code points, and an inversion list for the others, but if there are code
       * points that should match only conditionally on the target string being
@@ -1210,14 +1423,21 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      /* Add in the points from the bit map */
      for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
          if (ANYOF_BITMAP_TEST(node, i)) {
      /* Add in the points from the bit map */
      for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
          if (ANYOF_BITMAP_TEST(node, i)) {
-            invlist = add_cp_to_invlist(invlist, i);
+            unsigned int start = i++;
+
+            for (; i < NUM_ANYOF_CODE_POINTS && ANYOF_BITMAP_TEST(node, i); ++i) {
+                /* empty */
+            }
+            invlist = _add_range_to_invlist(invlist, start, i-1);
              new_node_has_latin1 = TRUE;
          }
      }
  
      /* If this can match all upper Latin1 code points, have to add them
              new_node_has_latin1 = TRUE;
          }
      }
  
      /* If this can match all upper Latin1 code points, have to add them
-     * as well */
-    if (OP(node) == ANYOFD
+     * as well.  But don't add them if inverting, as when that gets done below,
+     * it would exclude all these characters, including the ones it shouldn't
+     * that were added just above */
+    if (! (ANYOF_FLAGS(node) & ANYOF_INVERT) && OP(node) == ANYOFD
          && (ANYOF_FLAGS(node) & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
      {
          _invlist_union(invlist, PL_UpperLatin1, &invlist);
          && (ANYOF_FLAGS(node) & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
      {
          _invlist_union(invlist, PL_UpperLatin1, &invlist);
@@ -1231,7 +1451,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
          _invlist_invert(invlist);
      }
      if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
          _invlist_invert(invlist);
      }
-    else if (new_node_has_latin1 && ANYOF_FLAGS(node) & ANYOF_LOC_FOLD) {
+    else if (new_node_has_latin1 && ANYOF_FLAGS(node) & ANYOFL_FOLD) {
  
          /* Under /li, any 0-255 could fold to any other 0-255, depending on the
           * locale.  We can skip this if there are no 0-255 at all. */
  
          /* Under /li, any 0-255 could fold to any other 0-255, depending on the
           * locale.  We can skip this if there are no 0-255 at all. */
@@ -1308,7 +1528,12 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
          else {
              anded_flags = ANYOF_FLAGS(and_with)
              &( ANYOF_COMMON_FLAGS
          else {
              anded_flags = ANYOF_FLAGS(and_with)
              &( ANYOF_COMMON_FLAGS
-              |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER);
+              |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER
+              |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP);
+            if (ANYOFL_UTF8_LOCALE_REQD(ANYOF_FLAGS(and_with))) {
+                anded_flags &=
+                    ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
+            }
          }
      }
  
          }
      }
  
@@ -1463,7 +1688,12 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
          if (OP(or_with) != ANYOFD) {
              ored_flags
              |= ANYOF_FLAGS(or_with)
          if (OP(or_with) != ANYOFD) {
              ored_flags
              |= ANYOF_FLAGS(or_with)
-             & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+             & ( ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER
+                |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP);
+            if (ANYOFL_UTF8_LOCALE_REQD(ANYOF_FLAGS(or_with))) {
+                ored_flags |=
+                    ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
+            }
          }
      }
  
          }
      }
  
@@ -1608,35 +1838,28 @@ S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
       *      (unassigned, private use, surrogates, controls and formats).  This
       *      is a much large number. */
  
       *      (unassigned, private use, surrogates, controls and formats).  This
       *      is a much large number. */
  
-    const U32 max_match = (LOC)
-                          ? 127
-                          : (! UNI_SEMANTICS)
-                            ? 63
-                            : (invlist_highest(ssc->invlist) < 256)
-                              ? 127
-                              : ((NON_OTHER_COUNT + 1) / 2) - 1;
      U32 count = 0;      /* Running total of number of code points matched by
                             'ssc' */
      UV start, end;      /* Start and end points of current range in inversion
                             list */
      U32 count = 0;      /* Running total of number of code points matched by
                             'ssc' */
      UV start, end;      /* Start and end points of current range in inversion
                             list */
+    const U32 max_code_points = (LOC)
+                                ?  256
+                                : ((   ! UNI_SEMANTICS
+                                     || invlist_highest(ssc->invlist) < 256)
+                                  ? 128
+                                  : NON_OTHER_COUNT);
+    const U32 max_match = max_code_points / 2;
  
      PERL_ARGS_ASSERT_IS_SSC_WORTH_IT;
  
      invlist_iterinit(ssc->invlist);
      while (invlist_iternext(ssc->invlist, &start, &end)) {
  
      PERL_ARGS_ASSERT_IS_SSC_WORTH_IT;
  
      invlist_iterinit(ssc->invlist);
      while (invlist_iternext(ssc->invlist, &start, &end)) {
-
-        /* /u is the only thing that we expect to match above 255; so if not /u
-         * and even if there are matches above 255, ignore them.  This catches
-         * things like \d under /d which does match the digits above 255, but
-         * since the pattern is /d, it is not likely to be expecting them */
-        if (! UNI_SEMANTICS) {
-            if (start > 255) {
-                break;
-            }
-            end = MIN(end, 255);
+        if (start >= max_code_points) {
+            break;
          }
          }
+        end = MIN(end, max_code_points - 1);
          count += end - start + 1;
          count += end - start + 1;
-        if (count > max_match) {
+        if (count >= max_match) {
              invlist_iterfinish(ssc->invlist);
              return FALSE;
          }
              invlist_iterfinish(ssc->invlist);
              return FALSE;
          }
@@ -1665,7 +1888,8 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
       * by the time we reach here */
      assert(! (ANYOF_FLAGS(ssc)
          & ~( ANYOF_COMMON_FLAGS
       * by the time we reach here */
      assert(! (ANYOF_FLAGS(ssc)
          & ~( ANYOF_COMMON_FLAGS
-            |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)));
+            |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER
+            |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP)));
  
      populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
  
  
      populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
  
@@ -3364,6 +3588,14 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
   * The adjacent nodes actually may be separated by NOTHING-kind nodes, and
   * these get optimized out
   *
   * The adjacent nodes actually may be separated by NOTHING-kind nodes, and
   * these get optimized out
   *
+ * XXX khw thinks this should be enhanced to fill EXACT (at least) nodes as full
+ * as possible, even if that means splitting an existing node so that its first
+ * part is moved to the preceeding node.  This would maximise the efficiency of
+ * memEQ during matching.  Elsewhere in this file, khw proposes splitting
+ * EXACTFish nodes into portions that don't change under folding vs those that
+ * do.  Those portions that don't change may be the only things in the pattern that
+ * could be used to find fixed and floating strings.
+ *
   * If a node is to match under /i (folded), the number of characters it matches
   * can be different than its character length if it contains a multi-character
   * fold.  *min_subtract is set to the total delta number of characters of the
   * If a node is to match under /i (folded), the number of characters it matches
   * can be different than its character length if it contains a multi-character
   * fold.  *min_subtract is set to the total delta number of characters of the
@@ -4802,7 +5034,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
                         "Quantifier unexpected on zero-length expression "
                         "in regex m/%"UTF8f"/",
                     Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
                         "Quantifier unexpected on zero-length expression "
                         "in regex m/%"UTF8f"/",
-                        UTF8fARG(UTF, RExC_end - RExC_precomp,
+                        UTF8fARG(UTF, RExC_precomp_end - RExC_precomp,
                                   RExC_precomp));
                     (void)ReREFCNT_inc(RExC_rx_sv);
                 }
                                   RExC_precomp));
                     (void)ReREFCNT_inc(RExC_rx_sv);
                 }
@@ -6482,6 +6714,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      /* Initialize these here instead of as-needed, as is quick and avoids
       * having to test them each time otherwise */
      if (! PL_AboveLatin1) {
      /* Initialize these here instead of as-needed, as is quick and avoids
       * having to test them each time otherwise */
      if (! PL_AboveLatin1) {
+#ifdef DEBUGGING
+        char * dump_len_string;
+#endif
+
         PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
         PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
         PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
         PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
         PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
         PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
@@ -6495,6 +6731,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         PL_InBitmap = _new_invlist(2);
         PL_InBitmap = _add_range_to_invlist(PL_InBitmap, 0,
                                                      NUM_ANYOF_CODE_POINTS - 1);
         PL_InBitmap = _new_invlist(2);
         PL_InBitmap = _add_range_to_invlist(PL_InBitmap, 0,
                                                      NUM_ANYOF_CODE_POINTS - 1);
+#ifdef DEBUGGING
+        dump_len_string = PerlEnv_getenv("PERL_DUMP_RE_MAX_LEN");
+        if (   ! dump_len_string
+            || ! grok_atoUV(dump_len_string, (UV *)&PL_dump_re_max_len, NULL))
+        {
+            PL_dump_re_max_len = 0;
+        }
+#endif
      }
  
      pRExC_state->code_blocks = NULL;
      }
  
      pRExC_state->code_blocks = NULL;
@@ -6675,11 +6919,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      }
  
      RExC_precomp = exp;
      }
  
      RExC_precomp = exp;
+    RExC_precomp_adj = 0;
      RExC_flags = rx_flags;
      RExC_pm_flags = pm_flags;
  
      if (runtime_code) {
      RExC_flags = rx_flags;
      RExC_pm_flags = pm_flags;
  
      if (runtime_code) {
-       if (TAINTING_get && TAINT_get)
+        assert(TAINTING_get || !TAINT_get);
+       if (TAINT_get)
             Perl_croak(aTHX_ "Eval-group in insecure regular expression");
  
         if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
             Perl_croak(aTHX_ "Eval-group in insecure regular expression");
  
         if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
@@ -6707,8 +6953,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      /* First pass: determine size, legality. */
      RExC_parse = exp;
  
      /* First pass: determine size, legality. */
      RExC_parse = exp;
-    RExC_start = exp;
+    RExC_start = RExC_adjusted_start = exp;
      RExC_end = exp + plen;
      RExC_end = exp + plen;
+    RExC_precomp_end = RExC_end;
      RExC_naughty = 0;
      RExC_npar = 1;
      RExC_nestroot = 0;
      RExC_naughty = 0;
      RExC_npar = 1;
      RExC_nestroot = 0;
@@ -6728,6 +6975,15 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_recurse_count = 0;
      pRExC_state->code_index = 0;
  
      RExC_recurse_count = 0;
      pRExC_state->code_index = 0;
  
+    /* This NUL is guaranteed because the pattern comes from an SV*, and the sv
+     * code makes sure the final byte is an uncounted NUL.  But should this
+     * ever not be the case, lots of things could read beyond the end of the
+     * buffer: loops like
+     *      while(isFOO(*RExC_parse)) RExC_parse++;
+     *      strchr(RExC_parse, "foo");
+     * etc.  So it is worth noting. */
+    assert(*RExC_end == '\0');
+
      DEBUG_PARSE_r(
         PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n");
          RExC_lastnum=0;
      DEBUG_PARSE_r(
         PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n");
          RExC_lastnum=0;
@@ -7929,7 +8185,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
      assert (RExC_parse <= RExC_end);
      if (RExC_parse == RExC_end) NOOP;
      else if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
      assert (RExC_parse <= RExC_end);
      if (RExC_parse == RExC_end) NOOP;
      else if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
-        /* skip IDFIRST by using do...while */
+         /* Note that the code here assumes well-formed UTF-8.  Skip IDFIRST by
+          * using do...while */
         if (UTF)
             do {
                 RExC_parse += UTF8SKIP(RExC_parse);
         if (UTF)
             do {
                 RExC_parse += UTF8SKIP(RExC_parse);
@@ -8104,6 +8361,52 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  
  #ifndef PERL_IN_XSUB_RE
  
  
  #ifndef PERL_IN_XSUB_RE
  
+STATIC void
+S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
+{
+    /* Replaces the inversion list in 'src' with the one in 'dest'.  It steals
+     * the list from 'src', so 'src' is made to have a NULL list.  This is
+     * similar to what SvSetMagicSV() would do, if it were implemented on
+     * inversion lists, though this routine avoids a copy */
+
+    const UV src_len          = _invlist_len(src);
+    const bool src_offset     = *get_invlist_offset_addr(src);
+    const STRLEN src_byte_len = SvLEN(src);
+    char * array              = SvPVX(src);
+
+    const int oldtainted = TAINT_get;
+
+    PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST_DESTROYS_SRC;
+
+    assert(SvTYPE(src) == SVt_INVLIST);
+    assert(SvTYPE(dest) == SVt_INVLIST);
+    assert(! invlist_is_iterating(src));
+    assert(SvCUR(src) == 0 || SvCUR(src) < SvLEN(src));
+
+    /* Make sure it ends in the right place with a NUL, as our inversion list
+     * manipulations aren't careful to keep this true, but sv_usepvn_flags()
+     * asserts it */
+    array[src_byte_len - 1] = '\0';
+
+    TAINT_NOT;      /* Otherwise it breaks */
+    sv_usepvn_flags(dest,
+                    (char *) array,
+                    src_byte_len - 1,
+
+                    /* This flag is documented to cause a copy to be avoided */
+                    SV_HAS_TRAILING_NUL);
+    TAINT_set(oldtainted);
+    SvPV_set(src, 0);
+    SvLEN_set(src, 0);
+    SvCUR_set(src, 0);
+
+    /* Finish up copying over the other fields in an inversion list */
+    *get_invlist_offset_addr(dest) = src_offset;
+    invlist_set_len(dest, src_len, src_offset);
+    *get_invlist_previous_index_addr(dest) = 0;
+    invlist_iterfinish(dest);
+}
+
  PERL_STATIC_INLINE IV*
  S_get_invlist_previous_index_addr(SV* invlist)
  {
  PERL_STATIC_INLINE IV*
  S_get_invlist_previous_index_addr(SV* invlist)
  {
@@ -8139,17 +8442,34 @@ S_invlist_set_previous_index(SV* const invlist, const IV index)
  }
  
  PERL_STATIC_INLINE void
  }
  
  PERL_STATIC_INLINE void
-S_invlist_trim(SV* const invlist)
+S_invlist_trim(SV* invlist)
  {
  {
+    /* Free the not currently-being-used space in an inversion list */
+
+    /* But don't free up the space needed for the 0 UV that is always at the
+     * beginning of the list, nor the trailing NUL */
+    const UV min_size = TO_INTERNAL_SIZE(1) + 1;
+
      PERL_ARGS_ASSERT_INVLIST_TRIM;
  
      assert(SvTYPE(invlist) == SVt_INVLIST);
  
      PERL_ARGS_ASSERT_INVLIST_TRIM;
  
      assert(SvTYPE(invlist) == SVt_INVLIST);
  
-    /* Change the length of the inversion list to how many entries it currently
-     * has */
-    SvPV_shrink_to_cur((SV *) invlist);
+    SvPV_renew(invlist, MAX(min_size, SvCUR(invlist) + 1));
  }
  
  }
  
+PERL_STATIC_INLINE void
+S_invlist_clear(pTHX_ SV* invlist)    /* Empty the inversion list */
+{
+    PERL_ARGS_ASSERT_INVLIST_CLEAR;
+
+    assert(SvTYPE(invlist) == SVt_INVLIST);
+
+    invlist_set_len(invlist, 0, 0);
+    invlist_trim(invlist);
+}
+
+#endif /* ifndef PERL_IN_XSUB_RE */
+
  PERL_STATIC_INLINE bool
  S_invlist_is_iterating(SV* const invlist)
  {
  PERL_STATIC_INLINE bool
  S_invlist_is_iterating(SV* const invlist)
  {
@@ -8158,8 +8478,6 @@ S_invlist_is_iterating(SV* const invlist)
      return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
  }
  
      return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
  }
  
-#endif /* ifndef PERL_IN_XSUB_RE */
-
  PERL_STATIC_INLINE UV
  S_invlist_max(SV* const invlist)
  {
  PERL_STATIC_INLINE UV
  S_invlist_max(SV* const invlist)
  {
@@ -8372,7 +8690,9 @@ Perl__invlist_search(SV* const invlist, const UV cp)
      /* Searches the inversion list for the entry that contains the input code
       * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
       * return value is the index into the list's array of the range that
      /* Searches the inversion list for the entry that contains the input code
       * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
       * return value is the index into the list's array of the range that
-     * contains <cp> */
+     * contains <cp>, that is, 'i' such that
+     * array[i] <= cp < array[i+1]
+     */
  
      IV low = 0;
      IV mid;
  
      IV low = 0;
      IV mid;
@@ -8391,7 +8711,10 @@ Perl__invlist_search(SV* const invlist, const UV cp)
      array = invlist_array(invlist);
  
      mid = invlist_previous_index(invlist);
      array = invlist_array(invlist);
  
      mid = invlist_previous_index(invlist);
-    assert(mid >=0 && mid <= highest_element);
+    assert(mid >=0);
+    if (mid > highest_element) {
+        mid = highest_element;
+    }
  
      /* <mid> contains the cache of the result of the previous call to this
       * function (0 the first time).  See if this call is for the same result,
  
      /* <mid> contains the cache of the result of the previous call to this
       * function (0 the first time).  See if this call is for the same result,
@@ -8555,10 +8878,10 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* Take the union of two inversion lists and point <output> to it.  *output
       * SHOULD BE DEFINED upon input, and if it points to one of the two lists,
       * the reference count to that list will be decremented if not already a
      /* Take the union of two inversion lists and point <output> to it.  *output
       * SHOULD BE DEFINED upon input, and if it points to one of the two lists,
       * the reference count to that list will be decremented if not already a
-     * temporary (mortal); otherwise *output will be made correspondingly
-     * mortal.  The first list, <a>, may be NULL, in which case a copy of the
-     * second list is returned.  If <complement_b> is TRUE, the union is taken
-     * of the complement (inversion) of <b> instead of b itself.
+     * temporary (mortal); otherwise just its contents will be modified to be
+     * the union.  The first list, <a>, may be NULL, in which case a copy of
+     * the second list is returned.  If <complement_b> is TRUE, the union is
+     * taken of the complement (inversion) of <b> instead of b itself.
       *
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
       *
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
@@ -8597,56 +8920,103 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
      assert(a != b);
  
      PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
      assert(a != b);
  
-    /* If either one is empty, the union is the other one */
-    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
-        bool make_temp = FALSE; /* Should we mortalize the result? */
+    len_b = _invlist_len(b);
+    if (len_b == 0) {
  
  
-       if (*output == a) {
-            if (a != NULL) {
-                if (! (make_temp = cBOOL(SvTEMP(a)))) {
-                    SvREFCNT_dec_NN(a);
-                }
+        /* Here, 'b' is empty.  If the output is the complement of 'b', the
+         * union is all possible code points, and we need not even look at 'a'.
+         * It's easiest to create a new inversion list that matches everything.
+         * */
+        if (complement_b) {
+            SV* everything = _new_invlist(1);
+            _append_range_to_invlist(everything, 0, UV_MAX);
+
+            /* If the output didn't exist, just point it at the new list */
+            if (*output == NULL) {
+                *output = everything;
+                return;
              }
              }
-       }
-       if (*output != b) {
-           *output = invlist_clone(b);
-            if (complement_b) {
-                _invlist_invert(*output);
+
+            /* Otherwise, replace its contents with the new list */
+            invlist_replace_list_destroys_src(*output, everything);
+            SvREFCNT_dec_NN(everything);
+            return;
+        }
+
+        /* Here, we don't want the complement of 'b', and since it is empty,
+         * the union will come entirely from 'a'.  If 'a' is NULL or empty, the
+         * output will be empty */
+
+        if (a == NULL) {
+            *output = _new_invlist(0);
+            return;
+        }
+
+        if (_invlist_len(a) == 0) {
+            invlist_clear(*output);
+            return;
+        }
+
+        /* Here, 'a' is not empty, and entirely determines the union.  If the
+         * output is not to overwrite 'b', we can just return 'a'. */
+        if (*output != b) {
+
+            /* If the output is to overwrite 'a', we have a no-op, as it's
+             * already in 'a' */
+            if (*output == a) {
+                return;
              }
              }
-       } /* else *output already = b; */
  
  
-        if (make_temp) {
-            sv_2mortal(*output);
+            /* But otherwise we have to copy 'a' to the output */
+            *output = invlist_clone(a);
+            return;
          }
          }
+
+        /* Here, 'b' is to be overwritten by the output, which will be 'a' */
+        u = invlist_clone(a);
+        invlist_replace_list_destroys_src(*output, u);
+        SvREFCNT_dec_NN(u);
+
         return;
      }
         return;
      }
-    else if ((len_b = _invlist_len(b)) == 0) {
-        bool make_temp = FALSE;
-       if (*output == b) {
-            if (! (make_temp = cBOOL(SvTEMP(b)))) {
-                SvREFCNT_dec_NN(b);
+
+    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
+
+        /* Here, 'a' is empty (and b is not).  That means the union will come
+         * entirely from 'b'.  If the output is not to overwrite 'a', we can
+         * just return what's in 'b'.  */
+        if (*output != a) {
+
+            /* If the output is to overwrite 'b', it's already in 'b', but
+             * otherwise we have to copy 'b' to the output */
+            if (*output != b) {
+                *output = invlist_clone(b);
              }
              }
-       }
  
  
-        /* The complement of an empty list is a list that has everything in it,
-         * so the union with <a> includes everything too */
-        if (complement_b) {
-            if (a == *output) {
-                if (! (make_temp = cBOOL(SvTEMP(a)))) {
-                    SvREFCNT_dec_NN(a);
-                }
+            /* And if the output is to be the inversion of 'b', do that */
+            if (complement_b) {
+                _invlist_invert(*output);
              }
              }
-            *output = _new_invlist(1);
-            _append_range_to_invlist(*output, 0, UV_MAX);
+
+            return;
          }
          }
-        else if (*output != a) {
-            *output = invlist_clone(a);
+
+        /* Here, 'a', which is empty or even NULL, is to be overwritten by the
+         * output, which will either be 'b' or the complement of 'b' */
+
+        if (a == NULL) {
+            *output = invlist_clone(b);
          }
          }
-        /* else *output already = a; */
+        else {
+            u = invlist_clone(b);
+            invlist_replace_list_destroys_src(*output, u);
+            SvREFCNT_dec_NN(u);
+       }
  
  
-        if (make_temp) {
-            sv_2mortal(*output);
+        if (complement_b) {
+            _invlist_invert(*output);
          }
          }
+
         return;
      }
  
         return;
      }
  
@@ -8712,7 +9082,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
         /* Here, have chosen which of the two inputs to look at.  Only output
          * if the running count changes to/from 0, which marks the
  
         /* Here, have chosen which of the two inputs to look at.  Only output
          * if the running count changes to/from 0, which marks the
-        * beginning/end of a range in that's in the set */
+        * beginning/end of a range that's in the set */
         if (cp_in_set) {
             if (count == 0) {
                 array_u[i_u++] = cp;
         if (cp_in_set) {
             if (count == 0) {
                 array_u[i_u++] = cp;
@@ -8746,7 +9116,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
       * 4) the exhausted wasn't in its set, non-exhausted is, count is 1;
       *    decrementing to 0 insures that we look at the remainder of the
       *    non-exhausted set */
       * 4) the exhausted wasn't in its set, non-exhausted is, count is 1;
       *    decrementing to 0 insures that we look at the remainder of the
       *    non-exhausted set */
-    if ((i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
+    if (   (i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
         || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count--;
         || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count--;
@@ -8762,8 +9132,9 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         len_u += (len_a - i_a) + (len_b - i_b);
      }
  
         len_u += (len_a - i_a) + (len_b - i_b);
      }
  
-    /* Set result to final length, which can change the pointer to array_u, so
-     * re-find it */
+    /* Set the result to the final length, which can change the pointer to
+     * array_u, so re-find it.  (Note that it is unlikely that this will
+     * change, as we are shrinking the space, not enlarging it) */
      if (len_u != _invlist_len(u)) {
         invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
         invlist_trim(u);
      if (len_u != _invlist_len(u)) {
         invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
         invlist_trim(u);
@@ -8773,7 +9144,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* When 'count' is 0, the list that was exhausted (if one was shorter than
       * the other) ended with everything above it not in its set.  That means
       * that the remaining part of the union is precisely the same as the
      /* When 'count' is 0, the list that was exhausted (if one was shorter than
       * the other) ended with everything above it not in its set.  That means
       * that the remaining part of the union is precisely the same as the
-     * non-exhausted list, so can just copy it unchanged.  (If both list were
+     * non-exhausted list, so can just copy it unchanged.  (If both lists were
       * exhausted at the same time, then the operations below will be both 0.)
       */
      if (count == 0) {
       * exhausted at the same time, then the operations below will be both 0.)
       */
      if (count == 0) {
@@ -8786,21 +9157,32 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         }
      }
  
         }
      }
  
-    /*  We may be removing a reference to one of the inputs.  If so, the output
-     *  is made mortal if the input was.  (Mortal SVs shouldn't have their ref
-     *  count decremented) */
-    if (a == *output || b == *output) {
+    /* If the output is not to overwrite either of the inputs, just return the
+     * calculated union */
+    if (a != *output && b != *output) {
+        *output = u;
+    }
+    else {
+        /*  Here, the output is to be the same as one of the input scalars,
+         *  hence replacing it.  The simple thing to do is to free the input
+         *  scalar, making it instead be the output one.  But experience has
+         *  shown [perl #127392] that if the input is a mortal, we can get a
+         *  huge build-up of these during regex compilation before they get
+         *  freed.  So for that case, replace just the input's interior with
+         *  the output's, and then free the output */
+
          assert(! invlist_is_iterating(*output));
          assert(! invlist_is_iterating(*output));
-        if ((SvTEMP(*output))) {
-            sv_2mortal(u);
+
+        if (! SvTEMP(*output)) {
+            SvREFCNT_dec_NN(*output);
+            *output = u;
          }
          else {
          }
          else {
-            SvREFCNT_dec_NN(*output);
+            invlist_replace_list_destroys_src(*output, u);
+            SvREFCNT_dec_NN(u);
          }
      }
  
          }
      }
  
-    *output = u;
-
      return;
  }
  
      return;
  }
  
@@ -8811,11 +9193,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* Take the intersection of two inversion lists and point <i> to it.  *i
       * SHOULD BE DEFINED upon input, and if it points to one of the two lists,
       * the reference count to that list will be decremented if not already a
      /* Take the intersection of two inversion lists and point <i> to it.  *i
       * SHOULD BE DEFINED upon input, and if it points to one of the two lists,
       * the reference count to that list will be decremented if not already a
-     * temporary (mortal); otherwise *i will be made correspondingly mortal.
-     * The first list, <a>, may be NULL, in which case an empty list is
-     * returned.  If <complement_b> is TRUE, the result will be the
-     * intersection of <a> and the complement (or inversion) of <b> instead of
-     * <b> directly.
+     * temporary (mortal); otherwise just its contents will be modified to be
+     * the intersection.  The first list, <a>, may be NULL, in which case an
+     * empty list is returned.  If <complement_b> is TRUE, the result will be
+     * the intersection of <a> and the complement (or inversion) of <b> instead
+     * of <b> directly.
       *
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
       *
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
@@ -8853,48 +9235,38 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* Special case if either one is empty */
      len_a = (a == NULL) ? 0 : _invlist_len(a);
      if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
      /* Special case if either one is empty */
      len_a = (a == NULL) ? 0 : _invlist_len(a);
      if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
-        bool make_temp = FALSE;
-
          if (len_a != 0 && complement_b) {
  
          if (len_a != 0 && complement_b) {
  
-            /* Here, 'a' is not empty, therefore from the above 'if', 'b' must
-             * be empty.  Here, also we are using 'b's complement, which hence
-             * must be every possible code point.  Thus the intersection is
-             * simply 'a'. */
-            if (*i != a) {
-                if (*i == b) {
-                    if (! (make_temp = cBOOL(SvTEMP(b)))) {
-                        SvREFCNT_dec_NN(b);
-                    }
-                }
+            /* Here, 'a' is not empty, therefore from the enclosing 'if', 'b'
+             * must be empty.  Here, also we are using 'b's complement, which
+             * hence must be every possible code point.  Thus the intersection
+             * is simply 'a'. */
  
  
-                *i = invlist_clone(a);
+            if (*i == a) {  /* No-op */
+                return;
              }
              }
-            /* else *i is already 'a' */
  
  
-            if (make_temp) {
-                sv_2mortal(*i);
+            /* If not overwriting either input, just make a copy of 'a' */
+            if (*i != b) {
+                *i = invlist_clone(a);
+                return;
              }
              }
+
+            /* Here we are overwriting 'b' with 'a's contents */
+            r = invlist_clone(a);
+            invlist_replace_list_destroys_src(*i, r);
+            SvREFCNT_dec_NN(r);
              return;
          }
  
          /* Here, 'a' or 'b' is empty and not using the complement of 'b'.  The
           * intersection must be empty */
              return;
          }
  
          /* Here, 'a' or 'b' is empty and not using the complement of 'b'.  The
           * intersection must be empty */
-       if (*i == a) {
-            if (! (make_temp = cBOOL(SvTEMP(a)))) {
-                SvREFCNT_dec_NN(a);
-            }
-       }
-       else if (*i == b) {
-            if (! (make_temp = cBOOL(SvTEMP(b)))) {
-                SvREFCNT_dec_NN(b);
-            }
-       }
-       *i = _new_invlist(0);
-        if (make_temp) {
-            sv_2mortal(*i);
+        if (*i == NULL) {
+            *i = _new_invlist(0);
+            return;
          }
  
          }
  
+        invlist_clear(*i);
         return;
      }
  
         return;
      }
  
@@ -8992,7 +9364,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
       *    everything that remains in the non-exhausted set.
       * 4) the exhausted wasn't in its set, non-exhausted is, count is 1, and
       *    remains 1.  And the intersection has nothing more. */
       *    everything that remains in the non-exhausted set.
       * 4) the exhausted wasn't in its set, non-exhausted is, count is 1, and
       *    remains 1.  And the intersection has nothing more. */
-    if ((i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
+    if (   (i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
         || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count++;
         || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count++;
@@ -9006,8 +9378,9 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         len_r += (len_a - i_a) + (len_b - i_b);
      }
  
         len_r += (len_a - i_a) + (len_b - i_b);
      }
  
-    /* Set result to final length, which can change the pointer to array_r, so
-     * re-find it */
+    /* Set the result to the final length, which can change the pointer to
+     * array_r, so re-find it.  (Note that it is unlikely that this will
+     * change, as we are shrinking the space, not enlarging it) */
      if (len_r != _invlist_len(r)) {
         invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
         invlist_trim(r);
      if (len_r != _invlist_len(r)) {
         invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
         invlist_trim(r);
@@ -9025,21 +9398,38 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         }
      }
  
         }
      }
  
-    /*  We may be removing a reference to one of the inputs.  If so, the output
-     *  is made mortal if the input was.  (Mortal SVs shouldn't have their ref
-     *  count decremented) */
-    if (a == *i || b == *i) {
+    /* If the output is not to overwrite either of the inputs, just return the
+     * calculated intersection */
+    if (a != *i && b != *i) {
+        *i = r;
+    }
+    else {
+        /*  Here, the output is to be the same as one of the input scalars,
+         *  hence replacing it.  The simple thing to do is to free the input
+         *  scalar, making it instead be the output one.  But experience has
+         *  shown [perl #127392] that if the input is a mortal, we can get a
+         *  huge build-up of these during regex compilation before they get
+         *  freed.  So for that case, replace just the input's interior with
+         *  the output's, and then free the output.  A short-cut in this case
+         *  is if the output is empty, we can just set the input to be empty */
+
          assert(! invlist_is_iterating(*i));
          assert(! invlist_is_iterating(*i));
-        if (SvTEMP(*i)) {
-            sv_2mortal(r);
+
+        if (! SvTEMP(*i)) {
+            SvREFCNT_dec_NN(*i);
+            *i = r;
          }
          else {
          }
          else {
-            SvREFCNT_dec_NN(*i);
+            if (len_r) {
+                invlist_replace_list_destroys_src(*i, r);
+            }
+            else {
+                invlist_clear(*i);
+            }
+            SvREFCNT_dec_NN(r);
          }
      }
  
          }
      }
  
-    *i = r;
-
      return;
  }
  
      return;
  }
  
@@ -9275,38 +9665,56 @@ S_invlist_highest(SV* const invlist)
             : array[len - 1] - 1;
  }
  
             : array[len - 1] - 1;
  }
  
-#ifndef PERL_IN_XSUB_RE
-SV *
-Perl__invlist_contents(pTHX_ SV* const invlist)
+STATIC SV *
+S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
  {
      /* Get the contents of an inversion list into a string SV so that they can
  {
      /* Get the contents of an inversion list into a string SV so that they can
-     * be printed out.  It uses the format traditionally done for debug tracing
-     */
+     * be printed out.  If 'traditional_style' is TRUE, it uses the format
+     * traditionally done for debug tracing; otherwise it uses a format
+     * suitable for just copying to the output, with blanks between ranges and
+     * a dash between range components */
  
      UV start, end;
  
      UV start, end;
-    SV* output = newSVpvs("\n");
+    SV* output;
+    const char intra_range_delimiter = (traditional_style ? '\t' : '-');
+    const char inter_range_delimiter = (traditional_style ? '\n' : ' ');
+
+    if (traditional_style) {
+        output = newSVpvs("\n");
+    }
+    else {
+        output = newSVpvs("");
+    }
  
  
-    PERL_ARGS_ASSERT__INVLIST_CONTENTS;
+    PERL_ARGS_ASSERT_INVLIST_CONTENTS;
  
      assert(! invlist_is_iterating(invlist));
  
      invlist_iterinit(invlist);
      while (invlist_iternext(invlist, &start, &end)) {
         if (end == UV_MAX) {
  
      assert(! invlist_is_iterating(invlist));
  
      invlist_iterinit(invlist);
      while (invlist_iternext(invlist, &start, &end)) {
         if (end == UV_MAX) {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"\tINFINITY\n", start);
+           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%cINFINITY%c",
+                                          start, intra_range_delimiter,
+                                                 inter_range_delimiter);
         }
         else if (end != start) {
         }
         else if (end != start) {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"\t%04"UVXf"\n",
-                   start,       end);
+           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c%04"UVXf"%c",
+                                         start,
+                                                   intra_range_delimiter,
+                                                  end, inter_range_delimiter);
         }
         else {
         }
         else {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"\n", start);
+           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c",
+                                          start, inter_range_delimiter);
         }
      }
  
         }
      }
  
+    if (SvCUR(output) && ! traditional_style) {/* Get rid of trailing blank */
+        SvCUR_set(output, SvCUR(output) - 1);
+    }
+
      return output;
  }
      return output;
  }
-#endif
  
  #ifndef PERL_IN_XSUB_RE
  void
  
  #ifndef PERL_IN_XSUB_RE
  void
@@ -9377,9 +9785,9 @@ Perl__load_PL_utf8_foldclosures (pTHX)
  }
  #endif
  
  }
  #endif
  
-#ifdef PERL_ARGS_ASSERT__INVLISTEQ
+#if defined(PERL_ARGS_ASSERT__INVLISTEQ) && !defined(PERL_IN_XSUB_RE)
  bool
  bool
-S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
+Perl__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
  {
      /* Return a boolean as to if the two passed in inversion lists are
       * identical.  The final argument, if TRUE, says to take the complement of
  {
      /* Return a boolean as to if the two passed in inversion lists are
       * identical.  The final argument, if TRUE, says to take the complement of
@@ -9636,7 +10044,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
          cs = REGEX_UNICODE_CHARSET;
      }
  
          cs = REGEX_UNICODE_CHARSET;
      }
  
-    while (*RExC_parse) {
+    while (RExC_parse < RExC_end) {
          /* && strchr("iogcmsx", *RExC_parse) */
          /* (?g), (?gc) and (?o) are useless here
             and must be globally applied -- japhy */
          /* && strchr("iogcmsx", *RExC_parse) */
          /* (?g), (?gc) and (?o) are useless here
             and must be globally applied -- japhy */
@@ -9794,7 +10202,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  NOT_REACHED; /*NOTREACHED*/
          }
  
                  NOT_REACHED; /*NOTREACHED*/
          }
  
-        ++RExC_parse;
+        RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
      }
  
      vFAIL("Sequence (?... not terminated");
      }
  
      vFAIL("Sequence (?... not terminated");
@@ -9816,6 +10224,54 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
  #define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
  #endif
  
  #define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
  #endif
  
+PERL_STATIC_INLINE regnode *
+S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
+                             I32 *flagp,
+                             char * parse_start,
+                             char ch
+                      )
+{
+    regnode *ret;
+    char* name_start = RExC_parse;
+    U32 num = 0;
+    SV *sv_dat = reg_scan_name(pRExC_state, SIZE_ONLY
+                                            ? REG_RSN_RETURN_NULL
+                                            : REG_RSN_RETURN_DATA);
+    GET_RE_DEBUG_FLAGS_DECL;
+
+    PERL_ARGS_ASSERT_HANDLE_NAMED_BACKREF;
+
+    if (RExC_parse == name_start || *RExC_parse != ch) {
+        /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
+        vFAIL2("Sequence %.3s... not terminated",parse_start);
+    }
+
+    if (!SIZE_ONLY) {
+        num = add_data( pRExC_state, STR_WITH_LEN("S"));
+        RExC_rxi->data->data[num]=(void*)sv_dat;
+        SvREFCNT_inc_simple_void(sv_dat);
+    }
+    RExC_sawback = 1;
+    ret = reganode(pRExC_state,
+                   ((! FOLD)
+                     ? NREF
+                     : (ASCII_FOLD_RESTRICTED)
+                       ? NREFFA
+                       : (AT_LEAST_UNI_SEMANTICS)
+                         ? NREFFU
+                         : (LOC)
+                           ? NREFFL
+                           : NREFF),
+                    num);
+    *flagp |= HASWIDTH;
+
+    Set_Node_Offset(ret, parse_start+1);
+    Set_Node_Cur_Length(ret, parse_start);
+
+    nextchar(pRExC_state);
+    return ret;
+}
+
  /* Returns NULL, setting *flagp to TRYAGAIN at the end of (?) that only sets
     flags. Returns NULL, setting *flagp to RESTART_PASS1 if the sizing scan
     needs to be restarted, or'd with NEED_UTF8 if the pattern needs to be
  /* Returns NULL, setting *flagp to TRYAGAIN at the end of (?) that only sets
     flags. Returns NULL, setting *flagp to RESTART_PASS1 if the sizing scan
     needs to be restarted, or'd with NEED_UTF8 if the pattern needs to be
@@ -9851,6 +10307,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
  
      *flagp = 0;                                /* Tentatively. */
  
  
      *flagp = 0;                                /* Tentatively. */
  
+    /* Having this true makes it feasible to have a lot fewer tests for the
+     * parse pointer being in scope.  For example, we can write
+     *      while(isFOO(*RExC_parse)) RExC_parse++;
+     * instead of
+     *      while(RExC_parse < RExC_end && isFOO(*RExC_parse)) RExC_parse++;
+     */
+    assert(*RExC_end == '\0');
  
      /* Make an OPEN node, if parenthesized. */
      if (paren) {
  
      /* Make an OPEN node, if parenthesized. */
      if (paren) {
@@ -9861,40 +10324,49 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
           * indivisible */
          bool has_intervening_patws = paren == 2 && *(RExC_parse - 1) != '(';
  
           * indivisible */
          bool has_intervening_patws = paren == 2 && *(RExC_parse - 1) != '(';
  
+        if (RExC_parse >= RExC_end) {
+           vFAIL("Unmatched (");
+        }
+
          if ( *RExC_parse == '*') { /* (*VERB:ARG) */
          if ( *RExC_parse == '*') { /* (*VERB:ARG) */
-           char *start_verb = RExC_parse;
-           STRLEN verb_len = 0;
+           char *start_verb = RExC_parse + 1;
+           STRLEN verb_len;
             char *start_arg = NULL;
             unsigned char op = 0;
              int arg_required = 0;
              int internal_argval = -1; /* if >-1 we are not allowed an argument*/
  
              if (has_intervening_patws) {
             char *start_arg = NULL;
             unsigned char op = 0;
              int arg_required = 0;
              int internal_argval = -1; /* if >-1 we are not allowed an argument*/
  
              if (has_intervening_patws) {
-                RExC_parse++;
+                RExC_parse++;   /* past the '*' */
                  vFAIL("In '(*VERB...)', the '(' and '*' must be adjacent");
              }
                  vFAIL("In '(*VERB...)', the '(' and '*' must be adjacent");
              }
-           while ( *RExC_parse && *RExC_parse != ')' ) {
+           while (RExC_parse < RExC_end && *RExC_parse != ')' ) {
                 if ( *RExC_parse == ':' ) {
                     start_arg = RExC_parse + 1;
                     break;
                 }
                 if ( *RExC_parse == ':' ) {
                     start_arg = RExC_parse + 1;
                     break;
                 }
-               RExC_parse++;
+               RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
             }
             }
-           ++start_verb;
             verb_len = RExC_parse - start_verb;
             if ( start_arg ) {
             verb_len = RExC_parse - start_verb;
             if ( start_arg ) {
-               RExC_parse++;
-               while ( *RExC_parse && *RExC_parse != ')' )
-                   RExC_parse++;
-               if ( *RExC_parse != ')' )
+                if (RExC_parse >= RExC_end) {
+                    goto unterminated_verb_pattern;
+                }
+               RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+               while ( RExC_parse < RExC_end && *RExC_parse != ')' )
+                    RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+               if ( RExC_parse >= RExC_end || *RExC_parse != ')' )
+                  unterminated_verb_pattern:
                     vFAIL("Unterminated verb pattern argument");
                 if ( RExC_parse == start_arg )
                     start_arg = NULL;
             } else {
                     vFAIL("Unterminated verb pattern argument");
                 if ( RExC_parse == start_arg )
                     start_arg = NULL;
             } else {
-               if ( *RExC_parse != ')' )
+               if ( RExC_parse >= RExC_end || *RExC_parse != ')' )
                     vFAIL("Unterminated verb pattern");
             }
  
                     vFAIL("Unterminated verb pattern");
             }
  
+            /* Here, we know that RExC_parse < RExC_end */
+
             switch ( *start_verb ) {
              case 'A':  /* (*ACCEPT) */
                  if ( memEQs(start_verb,verb_len,"ACCEPT") ) {
             switch ( *start_verb ) {
              case 'A':  /* (*ACCEPT) */
                  if ( memEQs(start_verb,verb_len,"ACCEPT") ) {
@@ -9976,56 +10448,37 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  vFAIL("In '(?...)', the '(' and '?' must be adjacent");
              }
  
                  vFAIL("In '(?...)', the '(' and '?' must be adjacent");
              }
  
-           RExC_parse++;
-           paren = *RExC_parse++;
+           RExC_parse++;           /* past the '?' */
+            paren = *RExC_parse;    /* might be a trailing NUL, if not
+                                       well-formed */
+            RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+            if (RExC_parse > RExC_end) {
+                paren = '\0';
+            }
             ret = NULL;                 /* For look-ahead/behind. */
             switch (paren) {
  
             case 'P':   /* (?P...) variants for those used to PCRE/Python */
             ret = NULL;                 /* For look-ahead/behind. */
             switch (paren) {
  
             case 'P':   /* (?P...) variants for those used to PCRE/Python */
-               paren = *RExC_parse++;
-               if ( paren == '<')         /* (?P<...>) named capture */
+               paren = *RExC_parse;
+               if ( paren == '<') {    /* (?P<...>) named capture */
+                    RExC_parse++;
+                    if (RExC_parse >= RExC_end) {
+                        vFAIL("Sequence (?P<... not terminated");
+                    }
                     goto named_capture;
                     goto named_capture;
+                }
                  else if (paren == '>') {   /* (?P>name) named recursion */
                  else if (paren == '>') {   /* (?P>name) named recursion */
+                    RExC_parse++;
+                    if (RExC_parse >= RExC_end) {
+                        vFAIL("Sequence (?P>... not terminated");
+                    }
                      goto named_recursion;
                  }
                  else if (paren == '=') {   /* (?P=...)  named backref */
                      goto named_recursion;
                  }
                  else if (paren == '=') {   /* (?P=...)  named backref */
-                    /* this pretty much dupes the code for \k<NAME> in
-                     * regatom(), if you change this make sure you change that
-                     * */
-                    char* name_start = RExC_parse;
-                   U32 num = 0;
-                    SV *sv_dat = reg_scan_name(pRExC_state,
-                        SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
-                    if (RExC_parse == name_start || *RExC_parse != ')')
-                        /* diag_listed_as: Sequence ?P=... not terminated in regex; marked by <-- HERE in m/%s/ */
-                        vFAIL2("Sequence %.3s... not terminated",parse_start);
-
-                    if (!SIZE_ONLY) {
-                        num = add_data( pRExC_state, STR_WITH_LEN("S"));
-                        RExC_rxi->data->data[num]=(void*)sv_dat;
-                        SvREFCNT_inc_simple_void(sv_dat);
-                    }
-                    RExC_sawback = 1;
-                   ret = reganode(pRExC_state,
-                                  ((! FOLD)
-                                    ? NREF
-                                    : (ASCII_FOLD_RESTRICTED)
-                                      ? NREFFA
-                                       : (AT_LEAST_UNI_SEMANTICS)
-                                         ? NREFFU
-                                         : (LOC)
-                                           ? NREFFL
-                                           : NREFF),
-                                   num);
-                    *flagp |= HASWIDTH;
-
-                    Set_Node_Offset(ret, parse_start+1);
-                    Set_Node_Cur_Length(ret, parse_start);
-
-                    nextchar(pRExC_state);
-                    return ret;
+                    RExC_parse++;
+                    return handle_named_backref(pRExC_state, flagp,
+                                                parse_start, ')');
                  }
                  }
-                --RExC_parse;
                  RExC_parse += SKIP_IF_CHAR(RExC_parse);
                  /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                 vFAIL3("Sequence (%.*s...) not recognized",
                  RExC_parse += SKIP_IF_CHAR(RExC_parse);
                  /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                 vFAIL3("Sequence (%.*s...) not recognized",
@@ -10040,15 +10493,20 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     char *name_start;
                     SV *svname;
                     paren= '>';
                     char *name_start;
                     SV *svname;
                     paren= '>';
+                /* FALLTHROUGH */
              case '\'':          /* (?'...') */
              case '\'':          /* (?'...') */
-                   name_start= RExC_parse;
-                   svname = reg_scan_name(pRExC_state,
+                    name_start = RExC_parse;
+                    svname = reg_scan_name(pRExC_state,
                          SIZE_ONLY    /* reverse test from the others */
                          ? REG_RSN_RETURN_NAME
                          : REG_RSN_RETURN_NULL);
                          SIZE_ONLY    /* reverse test from the others */
                          ? REG_RSN_RETURN_NAME
                          : REG_RSN_RETURN_NULL);
-                   if (RExC_parse == name_start || *RExC_parse != paren)
+                   if (   RExC_parse == name_start
+                        || RExC_parse >= RExC_end
+                        || *RExC_parse != paren)
+                    {
                         vFAIL2("Sequence (?%c... not terminated",
                             paren=='>' ? '<' : paren);
                         vFAIL2("Sequence (?%c... not terminated",
                             paren=='>' ? '<' : paren);
+                    }
                     if (SIZE_ONLY) {
                         HE *he_str;
                         SV *sv_dat = NULL;
                     if (SIZE_ONLY) {
                         HE *he_str;
                         SV *sv_dat = NULL;
@@ -10116,6 +10574,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  RExC_seen |= REG_LOOKBEHIND_SEEN;
                 RExC_in_lookbehind++;
                 RExC_parse++;
                  RExC_seen |= REG_LOOKBEHIND_SEEN;
                 RExC_in_lookbehind++;
                 RExC_parse++;
+                assert(RExC_parse < RExC_end);
                  /* FALLTHROUGH */
             case '=':           /* (?=...) */
                 RExC_seen_zerolen++;
                  /* FALLTHROUGH */
             case '=':           /* (?=...) */
                 RExC_seen_zerolen++;
@@ -10163,7 +10622,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
                      num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
                  }
                         SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
                      num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
                  }
-                if (RExC_parse == RExC_end || *RExC_parse != ')')
+                if (RExC_parse >= RExC_end || *RExC_parse != ')')
                      vFAIL("Sequence (?&... not terminated");
                  goto gen_recurse_regop;
                  /* NOTREACHED */
                      vFAIL("Sequence (?&... not terminated");
                  goto gen_recurse_regop;
                  /* NOTREACHED */
@@ -10182,7 +10641,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  /* FALLTHROUGH */
              case '1': case '2': case '3': case '4': /* (?1) */
             case '5': case '6': case '7': case '8': case '9':
                  /* FALLTHROUGH */
              case '1': case '2': case '3': case '4': /* (?1) */
             case '5': case '6': case '7': case '8': case '9':
-               RExC_parse--;
+               RExC_parse = (char *) seqstart + 1;  /* Point to the digit */
                parse_recursion:
                  {
                      bool is_neg = FALSE;
                parse_recursion:
                  {
                      bool is_neg = FALSE;
@@ -10262,7 +10721,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     NOT_REACHED; /*NOTREACHED*/
                 }
                 *flagp |= POSTPONED;
                     NOT_REACHED; /*NOTREACHED*/
                 }
                 *flagp |= POSTPONED;
-               paren = *RExC_parse++;
+               paren = '{';
+                RExC_parse++;
                 /* FALLTHROUGH */
             case '{':           /* (?{...}) */
             {
                 /* FALLTHROUGH */
             case '{':           /* (?{...}) */
             {
@@ -10329,11 +10789,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 int is_define= 0;
                  const int DEFINE_len = sizeof("DEFINE") - 1;
                 if (RExC_parse[0] == '?') {        /* (?(?...)) */
                 int is_define= 0;
                  const int DEFINE_len = sizeof("DEFINE") - 1;
                 if (RExC_parse[0] == '?') {        /* (?(?...)) */
-                    if (
-                        RExC_parse[1] == '=' ||
-                        RExC_parse[1] == '!' ||
-                        RExC_parse[1] == '<' ||
-                        RExC_parse[1] == '{'
+                    if (   RExC_parse < RExC_end - 1
+                        && (   RExC_parse[1] == '='
+                            || RExC_parse[1] == '!'
+                            || RExC_parse[1] == '<'
+                            || RExC_parse[1] == '{')
                      ) { /* Lookahead or eval. */
                         I32 flag;
                          regnode *tail;
                      ) { /* Lookahead or eval. */
                         I32 flag;
                          regnode *tail;
@@ -10361,9 +10821,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     U32 num = 0;
                     SV *sv_dat=reg_scan_name(pRExC_state,
                         SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
                     U32 num = 0;
                     SV *sv_dat=reg_scan_name(pRExC_state,
                         SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
-                   if (RExC_parse == name_start || *RExC_parse != ch)
+                   if (   RExC_parse == name_start
+                        || RExC_parse >= RExC_end
+                        || *RExC_parse != ch)
+                    {
                          vFAIL2("Sequence (?(%c... not terminated",
                              (ch == '>' ? '<' : ch));
                          vFAIL2("Sequence (?(%c... not terminated",
                              (ch == '>' ? '<' : ch));
+                    }
                      RExC_parse++;
                     if (!SIZE_ONLY) {
                          num = add_data( pRExC_state, STR_WITH_LEN("S"));
                      RExC_parse++;
                     if (!SIZE_ONLY) {
                          num = add_data( pRExC_state, STR_WITH_LEN("S"));
@@ -10467,7 +10931,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     else
                         lastbr = NULL;
                      if (c != ')') {
                     else
                         lastbr = NULL;
                      if (c != ')') {
-                        if (RExC_parse>RExC_end)
+                        if (RExC_parse >= RExC_end)
                              vFAIL("Switch (?(condition)... not terminated");
                          else
                              vFAIL("Switch (?(condition)... contains too many branches");
                              vFAIL("Switch (?(condition)... not terminated");
                          else
                              vFAIL("Switch (?(condition)... contains too many branches");
@@ -10491,16 +10955,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             case '[':           /* (?[ ... ]) */
                  return handle_regex_sets(pRExC_state, NULL, flagp, depth,
                                           oregcomp_parse);
             case '[':           /* (?[ ... ]) */
                  return handle_regex_sets(pRExC_state, NULL, flagp, depth,
                                           oregcomp_parse);
-            case 0:
+            case 0: /* A NUL */
                 RExC_parse--; /* for vFAIL to print correctly */
                  vFAIL("Sequence (? incomplete");
                  break;
             default: /* e.g., (?i) */
                 RExC_parse--; /* for vFAIL to print correctly */
                  vFAIL("Sequence (? incomplete");
                  break;
             default: /* e.g., (?i) */
-               --RExC_parse;
+               RExC_parse = (char *) seqstart + 1;
                parse_flags:
                 parse_lparen_question_flags(pRExC_state);
                  if (UCHARAT(RExC_parse) != ':') {
                parse_flags:
                 parse_lparen_question_flags(pRExC_state);
                  if (UCHARAT(RExC_parse) != ':') {
-                    if (*RExC_parse)
+                    if (RExC_parse < RExC_end)
                          nextchar(pRExC_state);
                      *flagp = TRYAGAIN;
                      return NULL;
                          nextchar(pRExC_state);
                      *flagp = TRYAGAIN;
                      return NULL;
@@ -10936,7 +11400,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
                      /* We can't back off the size because we have to reserve
                       * enough space for all the things we are about to throw
  
                      /* We can't back off the size because we have to reserve
                       * enough space for all the things we are about to throw
-                     * away, but we can shrink it by the ammount we are about
+                     * away, but we can shrink it by the amount we are about
                       * to re-use here */
                      RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
                  }
                       * to re-use here */
                      RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
                  }
@@ -10947,15 +11411,13 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  ret = reganode(pRExC_state, OPFAIL, 0);
                  return ret;
              }
                  ret = reganode(pRExC_state, OPFAIL, 0);
                  return ret;
              }
-            else if (min == max && RExC_parse < RExC_end && *RExC_parse == '?')
+            else if (min == max && *RExC_parse == '?')
              {
                  if (PASS2) {
                      ckWARN2reg(RExC_parse + 1,
                                 "Useless use of greediness modifier '%c'",
                                 *RExC_parse);
                  }
              {
                  if (PASS2) {
                      ckWARN2reg(RExC_parse + 1,
                                 "Useless use of greediness modifier '%c'",
                                 *RExC_parse);
                  }
-                /* Absorb the modifier, so later code doesn't see nor use it */
-                nextchar(pRExC_state);
              }
  
           do_curly:
              }
  
           do_curly:
@@ -11071,13 +11533,12 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         (void)ReREFCNT_inc(RExC_rx_sv);
      }
  
         (void)ReREFCNT_inc(RExC_rx_sv);
      }
  
-    if (RExC_parse < RExC_end && *RExC_parse == '?') {
+    if (*RExC_parse == '?') {
         nextchar(pRExC_state);
         reginsert(pRExC_state, MINMOD, ret, depth+1);
          REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE);
      }
         nextchar(pRExC_state);
         reginsert(pRExC_state, MINMOD, ret, depth+1);
          REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE);
      }
-    else
-    if (RExC_parse < RExC_end && *RExC_parse == '+') {
+    else if (*RExC_parse == '+') {
          regnode *ender;
          nextchar(pRExC_state);
          ender = reg_node(pRExC_state, SUCCEED);
          regnode *ender;
          nextchar(pRExC_state);
          ender = reg_node(pRExC_state, SUCCEED);
@@ -11088,7 +11549,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          REGTAIL(pRExC_state, ret, ender);
      }
  
          REGTAIL(pRExC_state, ret, ender);
      }
  
-    if (RExC_parse < RExC_end && ISMULT2(RExC_parse)) {
+    if (ISMULT2(RExC_parse)) {
         RExC_parse++;
         vFAIL("Nested quantifiers");
      }
         RExC_parse++;
         vFAIL("Nested quantifiers");
      }
@@ -11102,6 +11563,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                  UV * code_point_p,
                  int * cp_count,
                  I32 * flagp,
                  UV * code_point_p,
                  int * cp_count,
                  I32 * flagp,
+                const bool strict,
                  const U32 depth
      )
  {
                  const U32 depth
      )
  {
@@ -11251,6 +11713,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                                          semantics */
  
      if (endbrace == RExC_parse) {   /* empty: \N{} */
                                          semantics */
  
      if (endbrace == RExC_parse) {   /* empty: \N{} */
+        if (strict) {
+            RExC_parse++;   /* Position after the "}" */
+            vFAIL("Zero length \\N{}");
+        }
          if (cp_count) {
              *cp_count = 0;
          }
          if (cp_count) {
              *cp_count = 0;
          }
@@ -11265,6 +11731,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
      RExC_parse += 2;   /* Skip past the 'U+' */
  
  
      RExC_parse += 2;   /* Skip past the 'U+' */
  
+    /* Because toke.c has generated a special construct for us guaranteed not
+     * to have NULs, we can use a str function */
      endchar = RExC_parse + strcspn(RExC_parse, ".}");
  
      /* Code points are separated by dots.  If none, there is only one code
      endchar = RExC_parse + strcspn(RExC_parse, ".}");
  
      /* Code points are separated by dots.  If none, there is only one code
@@ -11324,6 +11792,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         SV * substitute_parse;
         STRLEN len;
         char *orig_end = RExC_end;
         SV * substitute_parse;
         STRLEN len;
         char *orig_end = RExC_end;
+       char *save_start = RExC_start;
          I32 flags;
  
          /* Count the code points, if desired, in the sequence */
          I32 flags;
  
          /* Count the code points, if desired, in the sequence */
@@ -11369,7 +11838,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
         }
          sv_catpv(substitute_parse, ")");
  
         }
          sv_catpv(substitute_parse, ")");
  
-       RExC_parse = SvPV(substitute_parse, len);
+        RExC_parse = RExC_start = RExC_adjusted_start = SvPV(substitute_parse,
+                                                             len);
  
         /* Don't allow empty number */
         if (len < (STRLEN) 8) {
  
         /* Don't allow empty number */
         if (len < (STRLEN) 8) {
@@ -11399,6 +11869,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
          }
  
          /* Restore the saved values */
          }
  
          /* Restore the saved values */
+       RExC_start = RExC_adjusted_start = save_start;
         RExC_parse = endbrace;
         RExC_end = orig_end;
         RExC_override_recoding = 0;
         RExC_parse = endbrace;
         RExC_end = orig_end;
         RExC_override_recoding = 0;
@@ -11425,10 +11896,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
   * it returns U+FFFD (Replacement character) and sets *encp to NULL.
   */
  STATIC UV
   * it returns U+FFFD (Replacement character) and sets *encp to NULL.
   */
  STATIC UV
-S_reg_recode(pTHX_ const char value, SV **encp)
+S_reg_recode(pTHX_ const U8 value, SV **encp)
  {
      STRLEN numlen = 1;
  {
      STRLEN numlen = 1;
-    SV * const sv = newSVpvn_flags(&value, numlen, SVs_TEMP);
+    SV * const sv = newSVpvn_flags((const char *) &value, numlen, SVs_TEMP);
      const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
      const STRLEN newlen = SvCUR(sv);
      UV uv = UNICODE_REPLACEMENT;
      const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
      const STRLEN newlen = SvCUR(sv);
      UV uv = UNICODE_REPLACEMENT;
@@ -11743,6 +12214,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
    tryagain:
      parse_start = RExC_parse;
  
    tryagain:
      parse_start = RExC_parse;
+    assert(RExC_parse < RExC_end);
      switch ((U8)*RExC_parse) {
      case '^':
         RExC_seen_zerolen++;
      switch ((U8)*RExC_parse) {
      case '^':
         RExC_seen_zerolen++;
@@ -11782,6 +12254,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         FALSE, /* don't silence non-portable warnings. */
                         (bool) RExC_strict,
                         TRUE, /* Allow an optimized regnode result */
                         FALSE, /* don't silence non-portable warnings. */
                         (bool) RExC_strict,
                         TRUE, /* Allow an optimized regnode result */
+                       NULL,
                         NULL);
          if (ret == NULL) {
              if (*flagp & (RESTART_PASS1|NEED_UTF8))
                         NULL);
          if (ret == NULL) {
              if (*flagp & (RESTART_PASS1|NEED_UTF8))
@@ -11802,7 +12275,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
          ret = reg(pRExC_state, 2, &flags,depth+1);
         if (ret == NULL) {
                 if (flags & TRYAGAIN) {
          ret = reg(pRExC_state, 2, &flags,depth+1);
         if (ret == NULL) {
                 if (flags & TRYAGAIN) {
-                   if (RExC_parse == RExC_end) {
+                   if (RExC_parse >= RExC_end) {
                          /* Make parent create an empty node if needed. */
                         *flagp |= TRYAGAIN;
                         return(NULL);
                          /* Make parent create an empty node if needed. */
                         *flagp |= TRYAGAIN;
                         return(NULL);
@@ -11846,7 +12319,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
            required, as the default for this switch is to jump to the
            literal text handling code.
         */
            required, as the default for this switch is to jump to the
            literal text handling code.
         */
-       switch ((U8)*++RExC_parse) {
+       RExC_parse++;
+       switch ((U8)*RExC_parse) {
         /* Special Escapes */
         case 'A':
             RExC_seen_zerolen++;
         /* Special Escapes */
         case 'A':
             RExC_seen_zerolen++;
@@ -11914,7 +12388,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
             ret = reg_node(pRExC_state, op);
             *flagp |= SIMPLE;
  
             ret = reg_node(pRExC_state, op);
             *flagp |= SIMPLE;
-           if (*(RExC_parse + 1) != '{') {
+           if (RExC_parse >= RExC_end || *(RExC_parse + 1) != '{') {
                  FLAGS(ret) = TRADITIONAL_BOUND;
                  if (PASS2 && op > BOUNDA) {  /* /aa is same as /a */
                      OP(ret) = BOUNDA;
                  FLAGS(ret) = TRADITIONAL_BOUND;
                  if (PASS2 && op > BOUNDA) {  /* /aa is same as /a */
                      OP(ret) = BOUNDA;
@@ -11953,6 +12427,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                          }
                          FLAGS(ret) = GCB_BOUND;
                          break;
                          }
                          FLAGS(ret) = GCB_BOUND;
                          break;
+                    case 'l':
+                        if (length != 2 || *(RExC_parse + 1) != 'b') {
+                            goto bad_bound_type;
+                        }
+                        FLAGS(ret) = LB_BOUND;
+                        break;
                      case 's':
                          if (length != 2 || *(RExC_parse + 1) != 'b') {
                              goto bad_bound_type;
                      case 's':
                          if (length != 2 || *(RExC_parse + 1) != 'b') {
                              goto bad_bound_type;
@@ -12080,6 +12560,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                       non-portables */
                             (bool) RExC_strict,
                             TRUE, /* Allow an optimized regnode result */
                                       non-portables */
                             (bool) RExC_strict,
                             TRUE, /* Allow an optimized regnode result */
+                           NULL,
                             NULL);
              if (*flagp & RESTART_PASS1)
                  return NULL;
                             NULL);
              if (*flagp & RESTART_PASS1)
                  return NULL;
@@ -12115,6 +12596,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                NULL,     /* Don't need a count of how many code
                                             points */
                                flagp,
                                NULL,     /* Don't need a count of how many code
                                             points */
                                flagp,
+                              RExC_strict,
                                depth)
              ) {
                  break;
                                depth)
              ) {
                  break;
@@ -12130,48 +12612,25 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         case 'k':    /* Handle \k<NAME> and \k'NAME' */
        parse_named_seq:
          {
         case 'k':    /* Handle \k<NAME> and \k'NAME' */
        parse_named_seq:
          {
-            char ch= RExC_parse[1];
-           if (ch != '<' && ch != '\'' && ch != '{') {
+            char ch;
+            if (   RExC_parse >= RExC_end - 1
+                || ((   ch = RExC_parse[1]) != '<'
+                                      && ch != '\''
+                                      && ch != '{'))
+            {
                 RExC_parse++;
                 /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
                 vFAIL2("Sequence %.2s... not terminated",parse_start);
             } else {
                 RExC_parse++;
                 /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
                 vFAIL2("Sequence %.2s... not terminated",parse_start);
             } else {
-               /* this pretty much dupes the code for (?P=...) in reg(), if
-                   you change this make sure you change that */
-               char* name_start = (RExC_parse += 2);
-               U32 num = 0;
-                SV *sv_dat = reg_scan_name(pRExC_state,
-                    SIZE_ONLY ? REG_RSN_RETURN_NULL : REG_RSN_RETURN_DATA);
-                ch= (ch == '<') ? '>' : (ch == '{') ? '}' : '\'';
-                if (RExC_parse == name_start || *RExC_parse != ch)
-                    /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
-                    vFAIL2("Sequence %.3s... not terminated",parse_start);
-
-                if (!SIZE_ONLY) {
-                    num = add_data( pRExC_state, STR_WITH_LEN("S"));
-                    RExC_rxi->data->data[num]=(void*)sv_dat;
-                    SvREFCNT_inc_simple_void(sv_dat);
-                }
-
-                RExC_sawback = 1;
-                ret = reganode(pRExC_state,
-                               ((! FOLD)
-                                 ? NREF
-                                : (ASCII_FOLD_RESTRICTED)
-                                  ? NREFFA
-                                   : (AT_LEAST_UNI_SEMANTICS)
-                                     ? NREFFU
-                                     : (LOC)
-                                       ? NREFFL
-                                       : NREFF),
-                                num);
-                *flagp |= HASWIDTH;
-
-                /* override incorrect value set in reganode MJD */
-                Set_Node_Offset(ret, parse_start+1);
-                Set_Node_Cur_Length(ret, parse_start);
-                nextchar(pRExC_state);
-
+               RExC_parse += 2;
+                ret = handle_named_backref(pRExC_state,
+                                           flagp,
+                                           parse_start,
+                                           (ch == '<')
+                                           ? '>'
+                                           : (ch == '{')
+                                             ? '}'
+                                             : '\'');
              }
              break;
         }
              }
              break;
         }
@@ -12200,6 +12659,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         goto parse_named_seq;
                      }
  
                         goto parse_named_seq;
                      }
  
+                    if (RExC_parse >= RExC_end) {
+                        goto unterminated_g;
+                    }
                      num = S_backref_value(RExC_parse);
                      if (num == 0)
                          vFAIL("Reference to invalid group 0");
                      num = S_backref_value(RExC_parse);
                      if (num == 0)
                          vFAIL("Reference to invalid group 0");
@@ -12207,6 +12669,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                           if (isDIGIT(*RExC_parse))
                             vFAIL("Reference to nonexistent group");
                          else
                           if (isDIGIT(*RExC_parse))
                             vFAIL("Reference to nonexistent group");
                          else
+                          unterminated_g:
                              vFAIL("Unterminated \\g... pattern");
                      }
  
                              vFAIL("Unterminated \\g... pattern");
                      }
  
@@ -12358,7 +12821,12 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
           reparse:
  
              /* We look for the EXACTFish to EXACT node optimizaton only if
           reparse:
  
              /* We look for the EXACTFish to EXACT node optimizaton only if
-             * folding.  (And we don't need to figure this out until pass 2) */
+             * folding.  (And we don't need to figure this out until pass 2).
+             * XXX It might actually make sense to split the node into portions
+             * that are exact and ones that aren't, so that we could later use
+             * the exact ones to find the longest fixed and floating strings.
+             * One would want to join them back into a larger node.  One could
+             * use a pseudo regnode like 'EXACT_ORIG_FOLD' */
              maybe_exact = FOLD && PASS2;
  
             /* XXX The node can hold up to 255 bytes, yet this only goes to
              maybe_exact = FOLD && PASS2;
  
             /* XXX The node can hold up to 255 bytes, yet this only goes to
@@ -12455,6 +12923,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                              NULL,   /* Don't need a count of
                                                         how many code points */
                                              flagp,
                                              NULL,   /* Don't need a count of
                                                         how many code points */
                                              flagp,
+                                            RExC_strict,
                                              depth)
                          ) {
                              if (*flagp & NEED_UTF8)
                                              depth)
                          ) {
                              if (*flagp & NEED_UTF8)
@@ -12605,7 +13074,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             p += numlen;
                              if (PASS2   /* like \08, \178 */
                                  && numlen < 3
                             p += numlen;
                              if (PASS2   /* like \08, \178 */
                                  && numlen < 3
-                                && p < RExC_end
                                  && isDIGIT(*p) && ckWARN(WARN_REGEXP))
                              {
                                 reg_warn_non_literal_string(
                                  && isDIGIT(*p) && ckWARN(WARN_REGEXP))
                              {
                                 reg_warn_non_literal_string(
@@ -12619,7 +13087,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                       recode_encoding:
                         if (! RExC_override_recoding) {
                             SV* enc = _get_encoding();
                       recode_encoding:
                         if (! RExC_override_recoding) {
                             SV* enc = _get_encoding();
-                           ender = reg_recode((const char)(U8)ender, &enc);
+                           ender = reg_recode((U8)ender, &enc);
                             if (!enc && PASS2)
                                 ckWARNreg(p, "Invalid escape in the specified encoding");
                             REQUIRE_UTF8(flagp);
                             if (!enc && PASS2)
                                 ckWARNreg(p, "Invalid escape in the specified encoding");
                             REQUIRE_UTF8(flagp);
@@ -12747,14 +13215,14 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                      goto not_fold_common;
                  }
                  else /* A regular FOLD code point */
                      goto not_fold_common;
                  }
                  else /* A regular FOLD code point */
-                    if (! ( UTF
+                    if (! (   UTF
  #if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
     || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
                                        || UNICODE_DOT_DOT_VERSION > 0)
  #if    UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */   \
     || (UNICODE_MAJOR_VERSION == 3 && (   UNICODE_DOT_VERSION > 0)       \
                                        || UNICODE_DOT_DOT_VERSION > 0)
-                        /* See comments for join_exact() as to why we fold this
-                         * non-UTF at compile time */
-                        || (node_type == EXACTFU
-                            && ender == LATIN_SMALL_LETTER_SHARP_S)
+                            /* See comments for join_exact() as to why we fold
+                             * this non-UTF at compile time */
+                            || (   node_type == EXACTFU
+                                && ender == LATIN_SMALL_LETTER_SHARP_S)
  #endif
                  )) {
                      /* Here, are folding and are not UTF-8 encoded; therefore
  #endif
                  )) {
                      /* Here, are folding and are not UTF-8 encoded; therefore
@@ -13097,9 +13565,6 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
              if (end == UV_MAX && start <= NUM_ANYOF_CODE_POINTS) {
                  ANYOF_FLAGS(node) |= ANYOF_MATCHES_ALL_ABOVE_BITMAP;
              }
              if (end == UV_MAX && start <= NUM_ANYOF_CODE_POINTS) {
                  ANYOF_FLAGS(node) |= ANYOF_MATCHES_ALL_ABOVE_BITMAP;
              }
-            else if (end >= NUM_ANYOF_CODE_POINTS) {
-                ANYOF_FLAGS(node) |= ANYOF_HAS_UTF8_NONBITMAP_MATCHES;
-            }
  
             /* Quit if are above what we should change */
             if (start >= NUM_ANYOF_CODE_POINTS) {
  
             /* Quit if are above what we should change */
             if (start >= NUM_ANYOF_CODE_POINTS) {
@@ -13147,202 +13612,816 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
  #define POSIXCC_DONE(c)   ((c) == ':')
  #define POSIXCC_NOTYET(c) ((c) == '=' || (c) == '.')
  #define POSIXCC(c) (POSIXCC_DONE(c) || POSIXCC_NOTYET(c))
  #define POSIXCC_DONE(c)   ((c) == ':')
  #define POSIXCC_NOTYET(c) ((c) == '=' || (c) == '.')
  #define POSIXCC(c) (POSIXCC_DONE(c) || POSIXCC_NOTYET(c))
+#define MAYBE_POSIXCC(c) (POSIXCC(c) || (c) == '^' || (c) == ';')
+
+#define WARNING_PREFIX              "Assuming NOT a POSIX class since "
+#define NO_BLANKS_POSIX_WARNING     "no blanks are allowed in one"
+#define SEMI_COLON_POSIX_WARNING    "a semi-colon was found instead of a colon"
+
+#define NOT_MEANT_TO_BE_A_POSIX_CLASS (OOB_NAMEDCLASS - 1)
+
+/* 'posix_warnings' and 'warn_text' are names of variables in the following
+ * routine. q.v. */
+#define ADD_POSIX_WARNING(p, text)  STMT_START {                            \
+        if (posix_warnings) {                                               \
+            if (! warn_text) warn_text = newAV();                           \
+            av_push(warn_text, Perl_newSVpvf(aTHX_                          \
+                                             WARNING_PREFIX                 \
+                                             text                           \
+                                             REPORT_LOCATION,               \
+                                             REPORT_LOCATION_ARGS(p)));     \
+        }                                                                   \
+    } STMT_END
  
  
-PERL_STATIC_INLINE I32
-S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, const bool strict)
+STATIC int
+S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
+
+    const char * const s,      /* Where the putative posix class begins.
+                                  Normally, this is one past the '['.  This
+                                  parameter exists so it can be somewhere
+                                  besides RExC_parse. */
+    char ** updated_parse_ptr, /* Where to set the updated parse pointer, or
+                                  NULL */
+    AV ** posix_warnings,      /* Where to place any generated warnings, or
+                                  NULL */
+    const bool check_only      /* Don't die if error */
+)
  {
  {
-    I32 namedclass = OOB_NAMEDCLASS;
+    /* This parses what the caller thinks may be one of the three POSIX
+     * constructs:
+     *  1) a character class, like [:blank:]
+     *  2) a collating symbol, like [. .]
+     *  3) an equivalence class, like [= =]
+     * In the latter two cases, it croaks if it finds a syntactically legal
+     * one, as these are not handled by Perl.
+     *
+     * The main purpose is to look for a POSIX character class.  It returns:
+     *  a) the class number
+     *      if it is a completely syntactically and semantically legal class.
+     *      'updated_parse_ptr', if not NULL, is set to point to just after the
+     *      closing ']' of the class
+     *  b) OOB_NAMEDCLASS
+     *      if it appears that one of the three POSIX constructs was meant, but
+     *      its specification was somehow defective.  'updated_parse_ptr', if
+     *      not NULL, is set to point to the character just after the end
+     *      character of the class.  See below for handling of warnings.
+     *  c) NOT_MEANT_TO_BE_A_POSIX_CLASS
+     *      if it  doesn't appear that a POSIX construct was intended.
+     *      'updated_parse_ptr' is not changed.  No warnings nor errors are
+     *      raised.
+     *
+     * In b) there may be errors or warnings generated.  If 'check_only' is
+     * TRUE, then any errors are discarded.  Warnings are returned to the
+     * caller via an AV* created into '*posix_warnings' if it is not NULL.  If
+     * instead it is NULL, warnings are suppressed.  This is done in all
+     * passes.  The reason for this is that the rest of the parsing is heavily
+     * dependent on whether this routine found a valid posix class or not.  If
+     * it did, the closing ']' is absorbed as part of the class.  If no class,
+     * or an invalid one is found, any ']' will be considered the terminator of
+     * the outer bracketed character class, leading to very different results.
+     * In particular, a '(?[ ])' construct will likely have a syntax error if
+     * the class is parsed other than intended, and this will happen in pass1,
+     * before the warnings would normally be output.  This mechanism allows the
+     * caller to output those warnings in pass1 just before dieing, giving a
+     * much better clue as to what is wrong.
+     *
+     * The reason for this function, and its complexity is that a bracketed
+     * character class can contain just about anything.  But it's easy to
+     * mistype the very specific posix class syntax but yielding a valid
+     * regular bracketed class, so it silently gets compiled into something
+     * quite unintended.
+     *
+     * The solution adopted here maintains backward compatibility except that
+     * it adds a warning if it looks like a posix class was intended but
+     * improperly specified.  The warning is not raised unless what is input
+     * very closely resembles one of the 14 legal posix classes.  To do this,
+     * it uses fuzzy parsing.  It calculates how many single-character edits it
+     * would take to transform what was input into a legal posix class.  Only
+     * if that number is quite small does it think that the intention was a
+     * posix class.  Obviously these are heuristics, and there will be cases
+     * where it errs on one side or another, and they can be tweaked as
+     * experience informs.
+     *
+     * The syntax for a legal posix class is:
+     *
+     * qr/(?xa: \[ : \^? [:lower:]{4,6} : \] )/
+     *
+     * What this routine considers syntactically to be an intended posix class
+     * is this (the comments indicate some restrictions that the pattern
+     * doesn't show):
+     *
+     *  qr/(?x: \[?                         # The left bracket, possibly
+     *                                      # omitted
+     *          \h*                         # possibly followed by blanks
+     *          (?: \^ \h* )?               # possibly a misplaced caret
+     *          [:;]?                       # The opening class character,
+     *                                      # possibly omitted.  A typo
+     *                                      # semi-colon can also be used.
+     *          \h*
+     *          \^?                         # possibly a correctly placed
+     *                                      # caret, but not if there was also
+     *                                      # a misplaced one
+     *          \h*
+     *          .{3,15}                     # The class name.  If there are
+     *                                      # deviations from the legal syntax,
+     *                                      # its edit distance must be close
+     *                                      # to a real class name in order
+     *                                      # for it to be considered to be
+     *                                      # an intended posix class.
+     *          \h*
+     *          [:punct:]?                  # The closing class character,
+     *                                      # possibly omitted.  If not a colon
+     *                                      # nor semi colon, the class name
+     *                                      # must be even closer to a valid
+     *                                      # one
+     *          \h*
+     *          \]?                         # The right bracket, possibly
+     *                                      # omitted.
+     *     )/
+     *
+     * In the above, \h must be ASCII-only.
+     *
+     * These are heuristics, and can be tweaked as field experience dictates.
+     * There will be cases when someone didn't intend to specify a posix class
+     * that this warns as being so.  The goal is to minimize these, while
+     * maximizing the catching of things intended to be a posix class that
+     * aren't parsed as such.
+     */
+
+    const char* p             = s;
+    const char * const e      = RExC_end;
+    unsigned complement       = 0;      /* If to complement the class */
+    bool found_problem        = FALSE;  /* Assume OK until proven otherwise */
+    bool has_opening_bracket  = FALSE;
+    bool has_opening_colon    = FALSE;
+    int class_number          = OOB_NAMEDCLASS; /* Out-of-bounds until find
+                                                   valid class */
+    AV* warn_text             = NULL;   /* any warning messages */
+    const char * possible_end = NULL;   /* used for a 2nd parse pass */
+    const char* name_start;             /* ptr to class name first char */
+
+    /* If the number of single-character typos the input name is away from a
+     * legal name is no more than this number, it is considered to have meant
+     * the legal name */
+    int max_distance          = 2;
+
+    /* to store the name.  The size determines the maximum length before we
+     * decide that no posix class was intended.  Should be at least
+     * sizeof("alphanumeric") */
+    UV input_text[15];
+
+    PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX;
+
+    if (p >= e) {
+        return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+    }
+
+    if (*(p - 1) != '[') {
+        ADD_POSIX_WARNING(p, "it doesn't start with a '['");
+        found_problem = TRUE;
+    }
+    else {
+        has_opening_bracket = TRUE;
+    }
+
+    /* They could be confused and think you can put spaces between the
+     * components */
+    if (isBLANK(*p)) {
+        found_problem = TRUE;
+
+        do {
+            p++;
+        } while (p < e && isBLANK(*p));
  
  
-    PERL_ARGS_ASSERT_REGPPOSIXCC;
+        ADD_POSIX_WARNING(p, NO_BLANKS_POSIX_WARNING);
+    }
  
  
-    if (value == '[' && RExC_parse + 1 < RExC_end &&
-       /* I smell either [: or [= or [. -- POSIX has been here, right? */
-       POSIXCC(UCHARAT(RExC_parse)))
+    /* For [. .] and [= =].  These are quite different internally from [: :],
+     * so they are handled separately.  */
+    if (POSIXCC_NOTYET(*p) && p < e - 3) /* 1 for the close, and 1 for the ']'
+                                            and 1 for at least one char in it
+                                          */
      {
      {
-       const char c = UCHARAT(RExC_parse);
-       char* const s = RExC_parse++;
+        const char open_char  = *p;
+        const char * temp_ptr = p + 1;
+
+        /* These two constructs are not handled by perl, and if we find a
+         * syntactically valid one, we croak.  khw, who wrote this code, finds
+         * this explanation of them very unclear:
+         * http://pubs.opengroup.org/onlinepubs/009696899/basedefs/xbd_chap09.html
+         * And searching the rest of the internet wasn't very helpful either.
+         * It looks like just about any byte can be in these constructs,
+         * depending on the locale.  But unless the pattern is being compiled
+         * under /l, which is very rare, Perl runs under the C or POSIX locale.
+         * In that case, it looks like [= =] isn't allowed at all, and that
+         * [. .] could be any single code point, but for longer strings the
+         * constituent characters would have to be the ASCII alphabetics plus
+         * the minus-hyphen.  Any sensible locale definition would limit itself
+         * to these.  And any portable one definitely should.  Trying to parse
+         * the general case is a nightmare (see [perl #127604]).  So, this code
+         * looks only for interiors of these constructs that match:
+         *      qr/.|[-\w]{2,}/
+         * Using \w relaxes the apparent rules a little, without adding much
+         * danger of mistaking something else for one of these constructs.
+         *
+         * [. .] in some implementations described on the internet is usable to
+         * escape a character that otherwise is special in bracketed character
+         * classes.  For example [.].] means a literal right bracket instead of
+         * the ending of the class
+         *
+         * [= =] can legitimately contain a [. .] construct, but we don't
+         * handle this case, as that [. .] construct will later get parsed
+         * itself and croak then.  And [= =] is checked for even when not under
+         * /l, as Perl has long done so.
+         *
+         * The code below relies on there being a trailing NUL, so it doesn't
+         * have to keep checking if the parse ptr < e.
+         */
+        if (temp_ptr[1] == open_char) {
+            temp_ptr++;
+        }
+        else while (    temp_ptr < e
+                    && (isWORDCHAR(*temp_ptr) || *temp_ptr == '-'))
+        {
+            temp_ptr++;
+        }
  
  
-       while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
-           RExC_parse++;
-       if (RExC_parse == RExC_end) {
-            if (strict) {
+        if (*temp_ptr == open_char) {
+            temp_ptr++;
+            if (*temp_ptr == ']') {
+                temp_ptr++;
+                if (! found_problem && ! check_only) {
+                    RExC_parse = (char *) temp_ptr;
+                    vFAIL3("POSIX syntax [%c %c] is reserved for future "
+                            "extensions", open_char, open_char);
+                }
  
  
-                /* Try to give a better location for the error (than the end of
-                 * the string) by looking for the matching ']' */
-                RExC_parse = s;
-                while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
-                    RExC_parse++;
+                /* Here, the syntax wasn't completely valid, or else the call
+                 * is to check-only */
+                if (updated_parse_ptr) {
+                    *updated_parse_ptr = (char *) temp_ptr;
                  }
                  }
-                vFAIL2("Unmatched '%c' in POSIX class", c);
+
+                return OOB_NAMEDCLASS;
              }
              }
-           /* Grandfather lone [:, [=, [. */
-           RExC_parse = s;
          }
          }
-       else {
-           const char* const t = RExC_parse++; /* skip over the c */
-           assert(*t == c);
-
-           if (UCHARAT(RExC_parse) == ']') {
-               const char *posixcc = s + 1;
-               RExC_parse++; /* skip over the ending ] */
-
-               if (*s == ':') {
-                   const I32 complement = *posixcc == '^' ? *posixcc++ : 0;
-                   const I32 skip = t - posixcc;
-
-                   /* Initially switch on the length of the name.  */
-                   switch (skip) {
-                   case 4:
-                        if (memEQ(posixcc, "word", 4)) /* this is not POSIX,
-                                                          this is the Perl \w
-                                                        */
-                           namedclass = ANYOF_WORDCHAR;
-                       break;
-                   case 5:
-                       /* Names all of length 5.  */
-                       /* alnum alpha ascii blank cntrl digit graph lower
-                          print punct space upper  */
-                       /* Offset 4 gives the best switch position.  */
-                       switch (posixcc[4]) {
-                       case 'a':
-                           if (memEQ(posixcc, "alph", 4)) /* alpha */
-                               namedclass = ANYOF_ALPHA;
-                           break;
-                       case 'e':
-                           if (memEQ(posixcc, "spac", 4)) /* space */
-                               namedclass = ANYOF_SPACE;
-                           break;
-                       case 'h':
-                           if (memEQ(posixcc, "grap", 4)) /* graph */
-                               namedclass = ANYOF_GRAPH;
-                           break;
-                       case 'i':
-                           if (memEQ(posixcc, "asci", 4)) /* ascii */
-                               namedclass = ANYOF_ASCII;
-                           break;
-                       case 'k':
-                           if (memEQ(posixcc, "blan", 4)) /* blank */
-                               namedclass = ANYOF_BLANK;
-                           break;
-                       case 'l':
-                           if (memEQ(posixcc, "cntr", 4)) /* cntrl */
-                               namedclass = ANYOF_CNTRL;
-                           break;
-                       case 'm':
-                           if (memEQ(posixcc, "alnu", 4)) /* alnum */
-                               namedclass = ANYOF_ALPHANUMERIC;
-                           break;
-                       case 'r':
-                           if (memEQ(posixcc, "lowe", 4)) /* lower */
-                               namedclass = (FOLD) ? ANYOF_CASED : ANYOF_LOWER;
-                           else if (memEQ(posixcc, "uppe", 4)) /* upper */
-                               namedclass = (FOLD) ? ANYOF_CASED : ANYOF_UPPER;
-                           break;
-                       case 't':
-                           if (memEQ(posixcc, "digi", 4)) /* digit */
-                               namedclass = ANYOF_DIGIT;
-                           else if (memEQ(posixcc, "prin", 4)) /* print */
-                               namedclass = ANYOF_PRINT;
-                           else if (memEQ(posixcc, "punc", 4)) /* punct */
-                               namedclass = ANYOF_PUNCT;
-                           break;
-                       }
-                       break;
-                   case 6:
-                       if (memEQ(posixcc, "xdigit", 6))
-                           namedclass = ANYOF_XDIGIT;
-                       break;
-                   }
  
  
-                   if (namedclass == OOB_NAMEDCLASS)
-                       vFAIL2utf8f(
-                            "POSIX class [:%"UTF8f":] unknown",
-                           UTF8fARG(UTF, t - s - 1, s + 1));
+        /* If we find something that started out to look like one of these
+         * constructs, but isn't, we continue below so that it can be checked
+         * for being a class name with a typo of '.' or '=' instead of a colon.
+         * */
+    }
  
  
-                    /* The #defines are structured so each complement is +1 to
-                     * the normal one */
-                    if (complement) {
-                        namedclass++;
-                    }
-                   assert (posixcc[skip] == ':');
-                   assert (posixcc[skip+1] == ']');
-               } else if (!SIZE_ONLY) {
-                   /* [[=foo=]] and [[.foo.]] are still future. */
-
-                   /* adjust RExC_parse so the warning shows after
-                      the class closes */
-                   while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse) != ']')
-                       RExC_parse++;
-                   vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
-               }
-           } else {
-               /* Maternal grandfather:
-                * "[:" ending in ":" but not in ":]" */
-                if (strict) {
-                    vFAIL("Unmatched '[' in POSIX class");
-                }
+    /* Here, we think there is a possibility that a [: :] class was meant, and
+     * we have the first real character.  It could be they think the '^' comes
+     * first */
+    if (*p == '^') {
+        found_problem = TRUE;
+        ADD_POSIX_WARNING(p + 1, "the '^' must come after the colon");
+        complement = 1;
+        p++;
  
  
-                /* Grandfather lone [:, [=, [. */
-               RExC_parse = s;
-           }
-       }
+        if (isBLANK(*p)) {
+            found_problem = TRUE;
+
+            do {
+                p++;
+            } while (p < e && isBLANK(*p));
+
+            ADD_POSIX_WARNING(p, NO_BLANKS_POSIX_WARNING);
+        }
      }
  
      }
  
-    return namedclass;
-}
+    /* But the first character should be a colon, which they could have easily
+     * mistyped on a qwerty keyboard as a semi-colon (and which may be hard to
+     * distinguish from a colon, so treat that as a colon).  */
+    if (*p == ':') {
+        p++;
+        has_opening_colon = TRUE;
+    }
+    else if (*p == ';') {
+        found_problem = TRUE;
+        p++;
+        ADD_POSIX_WARNING(p, SEMI_COLON_POSIX_WARNING);
+        has_opening_colon = TRUE;
+    }
+    else {
+        found_problem = TRUE;
+        ADD_POSIX_WARNING(p, "there must be a starting ':'");
  
  
-STATIC bool
-S_could_it_be_a_POSIX_class(RExC_state_t *pRExC_state)
-{
-    /* This applies some heuristics at the current parse position (which should
-     * be at a '[') to see if what follows might be intended to be a [:posix:]
-     * class.  It returns true if it really is a posix class, of course, but it
-     * also can return true if it thinks that what was intended was a posix
-     * class that didn't quite make it.
-     *
-     * It will return true for
-     *      [:alphanumerics:
-     *      [:alphanumerics]  (as long as the ] isn't followed immediately by a
-     *                         ')' indicating the end of the (?[
-     *      [:any garbage including %^&$ punctuation:]
-     *
-     * This is designed to be called only from S_handle_regex_sets; it could be
-     * easily adapted to be called from the spot at the beginning of regclass()
-     * that checks to see in a normal bracketed class if the surrounding []
-     * have been omitted ([:word:] instead of [[:word:]]).  But doing so would
-     * change long-standing behavior, so I (khw) didn't do that */
-    char* p = RExC_parse + 1;
-    char first_char = *p;
+        /* Consider an initial punctuation (not one of the recognized ones) to
+         * be a left terminator */
+        if (*p != '^' && *p != ']' && isPUNCT(*p)) {
+            p++;
+        }
+    }
  
  
-    PERL_ARGS_ASSERT_COULD_IT_BE_A_POSIX_CLASS;
+    /* They may think that you can put spaces between the components */
+    if (isBLANK(*p)) {
+        found_problem = TRUE;
  
  
-    assert(*(p - 1) == '[');
+        do {
+            p++;
+        } while (p < e && isBLANK(*p));
  
  
-    if (! POSIXCC(first_char)) {
-        return FALSE;
+        ADD_POSIX_WARNING(p, NO_BLANKS_POSIX_WARNING);
      }
  
      }
  
-    p++;
-    while (p < RExC_end && isWORDCHAR(*p)) p++;
+    if (*p == '^') {
  
  
-    if (p >= RExC_end) {
-        return FALSE;
+        /* We consider something like [^:^alnum:]] to not have been intended to
+         * be a posix class, but XXX maybe we should */
+        if (complement) {
+            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+        }
+
+        complement = 1;
+        p++;
      }
  
      }
  
-    if (p - RExC_parse > 2    /* Got at least 1 word character */
-        && (*p == first_char
-            || (*p == ']' && p + 1 < RExC_end && *(p + 1) != ')')))
-    {
-        return TRUE;
+    /* Again, they may think that you can put spaces between the components */
+    if (isBLANK(*p)) {
+        found_problem = TRUE;
+
+        do {
+            p++;
+        } while (p < e && isBLANK(*p));
+
+        ADD_POSIX_WARNING(p, NO_BLANKS_POSIX_WARNING);
      }
  
      }
  
-    p = (char *) memchr(RExC_parse, ']', RExC_end - RExC_parse);
+    if (*p == ']') {
+
+        /* XXX This ']' may be a typo, and something else was meant.  But
+         * treating it as such creates enough complications, that that
+         * possibility isn't currently considered here.  So we assume that the
+         * ']' is what is intended, and if we've already found an initial '[',
+         * this leaves this construct looking like [:] or [:^], which almost
+         * certainly weren't intended to be posix classes */
+        if (has_opening_bracket) {
+            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+        }
+
+        /* But this function can be called when we parse the colon for
+         * something like qr/[alpha:]]/, so we back up to look for the
+         * beginning */
+        p--;
+
+        if (*p == ';') {
+            found_problem = TRUE;
+            ADD_POSIX_WARNING(p, SEMI_COLON_POSIX_WARNING);
+        }
+        else if (*p != ':') {
+
+            /* XXX We are currently very restrictive here, so this code doesn't
+             * consider the possibility that, say, /[alpha.]]/ was intended to
+             * be a posix class. */
+            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+        }
+
+        /* Here we have something like 'foo:]'.  There was no initial colon,
+         * and we back up over 'foo.  XXX Unlike the going forward case, we
+         * don't handle typos of non-word chars in the middle */
+        has_opening_colon = FALSE;
+        p--;
+
+        while (p > RExC_start && isWORDCHAR(*p)) {
+            p--;
+        }
+        p++;
+
+        /* Here, we have positioned ourselves to where we think the first
+         * character in the potential class is */
+    }
+
+    /* Now the interior really starts.  There are certain key characters that
+     * can end the interior, or these could just be typos.  To catch both
+     * cases, we may have to do two passes.  In the first pass, we keep on
+     * going unless we come to a sequence that matches
+     *      qr/ [[:punct:]] [[:blank:]]* \] /xa
+     * This means it takes a sequence to end the pass, so two typos in a row if
+     * that wasn't what was intended.  If the class is perfectly formed, just
+     * this one pass is needed.  We also stop if there are too many characters
+     * being accumulated, but this number is deliberately set higher than any
+     * real class.  It is set high enough so that someone who thinks that
+     * 'alphanumeric' is a correct name would get warned that it wasn't.
+     * While doing the pass, we keep track of where the key characters were in
+     * it.  If we don't find an end to the class, and one of the key characters
+     * was found, we redo the pass, but stop when we get to that character.
+     * Thus the key character was considered a typo in the first pass, but a
+     * terminator in the second.  If two key characters are found, we stop at
+     * the second one in the first pass.  Again this can miss two typos, but
+     * catches a single one
+     *
+     * In the first pass, 'possible_end' starts as NULL, and then gets set to
+     * point to the first key character.  For the second pass, it starts as -1.
+     * */
+
+    name_start = p;
+  parse_name:
+    {
+        bool has_blank               = FALSE;
+        bool has_upper               = FALSE;
+        bool has_terminating_colon   = FALSE;
+        bool has_terminating_bracket = FALSE;
+        bool has_semi_colon          = FALSE;
+        unsigned int name_len        = 0;
+        int punct_count              = 0;
+
+        while (p < e) {
+
+            /* Squeeze out blanks when looking up the class name below */
+            if (isBLANK(*p) ) {
+                has_blank = TRUE;
+                found_problem = TRUE;
+                p++;
+                continue;
+            }
+
+            /* The name will end with a punctuation */
+            if (isPUNCT(*p)) {
+                const char * peek = p + 1;
+
+                /* Treat any non-']' punctuation followed by a ']' (possibly
+                 * with intervening blanks) as trying to terminate the class.
+                 * ']]' is very likely to mean a class was intended (but
+                 * missing the colon), but the warning message that gets
+                 * generated shows the error position better if we exit the
+                 * loop at the bottom (eventually), so skip it here. */
+                if (*p != ']') {
+                    if (peek < e && isBLANK(*peek)) {
+                        has_blank = TRUE;
+                        found_problem = TRUE;
+                        do {
+                            peek++;
+                        } while (peek < e && isBLANK(*peek));
+                    }
+
+                    if (peek < e && *peek == ']') {
+                        has_terminating_bracket = TRUE;
+                        if (*p == ':') {
+                            has_terminating_colon = TRUE;
+                        }
+                        else if (*p == ';') {
+                            has_semi_colon = TRUE;
+                            has_terminating_colon = TRUE;
+                        }
+                        else {
+                            found_problem = TRUE;
+                        }
+                        p = peek + 1;
+                        goto try_posix;
+                    }
+                }
+
+                /* Here we have punctuation we thought didn't end the class.
+                 * Keep track of the position of the key characters that are
+                 * more likely to have been class-enders */
+                if (*p == ']' || *p == '[' || *p == ':' || *p == ';') {
+
+                    /* Allow just one such possible class-ender not actually
+                     * ending the class. */
+                    if (possible_end) {
+                        break;
+                    }
+                    possible_end = p;
+                }
+
+                /* If we have too many punctuation characters, no use in
+                 * keeping going */
+                if (++punct_count > max_distance) {
+                    break;
+                }
+
+                /* Treat the punctuation as a typo. */
+                input_text[name_len++] = *p;
+                p++;
+            }
+            else if (isUPPER(*p)) { /* Use lowercase for lookup */
+                input_text[name_len++] = toLOWER(*p);
+                has_upper = TRUE;
+                found_problem = TRUE;
+                p++;
+            } else if (! UTF || UTF8_IS_INVARIANT(*p)) {
+                input_text[name_len++] = *p;
+                p++;
+            }
+            else {
+                input_text[name_len++] = utf8_to_uvchr_buf((U8 *) p, e, NULL);
+                p+= UTF8SKIP(p);
+            }
+
+            /* The declaration of 'input_text' is how long we allow a potential
+             * class name to be, before saying they didn't mean a class name at
+             * all */
+            if (name_len >= C_ARRAY_LENGTH(input_text)) {
+                break;
+            }
+        }
+
+        /* We get to here when the possible class name hasn't been properly
+         * terminated before:
+         *   1) we ran off the end of the pattern; or
+         *   2) found two characters, each of which might have been intended to
+         *      be the name's terminator
+         *   3) found so many punctuation characters in the purported name,
+         *      that the edit distance to a valid one is exceeded
+         *   4) we decided it was more characters than anyone could have
+         *      intended to be one. */
+
+        found_problem = TRUE;
+
+        /* In the final two cases, we know that looking up what we've
+         * accumulated won't lead to a match, even a fuzzy one. */
+        if (   name_len >= C_ARRAY_LENGTH(input_text)
+            || punct_count > max_distance)
+        {
+            /* If there was an intermediate key character that could have been
+             * an intended end, redo the parse, but stop there */
+            if (possible_end && possible_end != (char *) -1) {
+                possible_end = (char *) -1; /* Special signal value to say
+                                               we've done a first pass */
+                p = name_start;
+                goto parse_name;
+            }
+
+            /* Otherwise, it can't have meant to have been a class */
+            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+        }
+
+        /* If we ran off the end, and the final character was a punctuation
+         * one, back up one, to look at that final one just below.  Later, we
+         * will restore the parse pointer if appropriate */
+        if (name_len && p == e && isPUNCT(*(p-1))) {
+            p--;
+            name_len--;
+        }
+
+        if (p < e && isPUNCT(*p)) {
+            if (*p == ']') {
+                has_terminating_bracket = TRUE;
+
+                /* If this is a 2nd ']', and the first one is just below this
+                 * one, consider that to be the real terminator.  This gives a
+                 * uniform and better positioning for the warning message  */
+                if (   possible_end
+                    && possible_end != (char *) -1
+                    && *possible_end == ']'
+                    && name_len && input_text[name_len - 1] == ']')
+                {
+                    name_len--;
+                    p = possible_end;
+
+                    /* And this is actually equivalent to having done the 2nd
+                     * pass now, so set it to not try again */
+                    possible_end = (char *) -1;
+                }
+            }
+            else {
+                if (*p == ':') {
+                    has_terminating_colon = TRUE;
+                }
+                else if (*p == ';') {
+                    has_semi_colon = TRUE;
+                    has_terminating_colon = TRUE;
+                }
+                p++;
+            }
+        }
+
+    try_posix:
+
+        /* Here, we have a class name to look up.  We can short circuit the
+         * stuff below for short names that can't possibly be meant to be a
+         * class name.  (We can do this on the first pass, as any second pass
+         * will yield an even shorter name) */
+        if (name_len < 3) {
+            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+        }
+
+        /* Find which class it is.  Initially switch on the length of the name.
+         * */
+        switch (name_len) {
+            case 4:
+                if (memEQ(name_start, "word", 4)) {
+                    /* this is not POSIX, this is the Perl \w */
+                    class_number = ANYOF_WORDCHAR;
+                }
+                break;
+            case 5:
+                /* Names all of length 5: alnum alpha ascii blank cntrl digit
+                 *                        graph lower print punct space upper
+                 * Offset 4 gives the best switch position.  */
+                switch (name_start[4]) {
+                    case 'a':
+                        if (memEQ(name_start, "alph", 4)) /* alpha */
+                            class_number = ANYOF_ALPHA;
+                        break;
+                    case 'e':
+                        if (memEQ(name_start, "spac", 4)) /* space */
+                            class_number = ANYOF_SPACE;
+                        break;
+                    case 'h':
+                        if (memEQ(name_start, "grap", 4)) /* graph */
+                            class_number = ANYOF_GRAPH;
+                        break;
+                    case 'i':
+                        if (memEQ(name_start, "asci", 4)) /* ascii */
+                            class_number = ANYOF_ASCII;
+                        break;
+                    case 'k':
+                        if (memEQ(name_start, "blan", 4)) /* blank */
+                            class_number = ANYOF_BLANK;
+                        break;
+                    case 'l':
+                        if (memEQ(name_start, "cntr", 4)) /* cntrl */
+                            class_number = ANYOF_CNTRL;
+                        break;
+                    case 'm':
+                        if (memEQ(name_start, "alnu", 4)) /* alnum */
+                            class_number = ANYOF_ALPHANUMERIC;
+                        break;
+                    case 'r':
+                        if (memEQ(name_start, "lowe", 4)) /* lower */
+                            class_number = (FOLD) ? ANYOF_CASED : ANYOF_LOWER;
+                        else if (memEQ(name_start, "uppe", 4)) /* upper */
+                            class_number = (FOLD) ? ANYOF_CASED : ANYOF_UPPER;
+                        break;
+                    case 't':
+                        if (memEQ(name_start, "digi", 4)) /* digit */
+                            class_number = ANYOF_DIGIT;
+                        else if (memEQ(name_start, "prin", 4)) /* print */
+                            class_number = ANYOF_PRINT;
+                        else if (memEQ(name_start, "punc", 4)) /* punct */
+                            class_number = ANYOF_PUNCT;
+                        break;
+                }
+                break;
+            case 6:
+                if (memEQ(name_start, "xdigit", 6))
+                    class_number = ANYOF_XDIGIT;
+                break;
+        }
+
+        /* If the name exactly matches a posix class name the class number will
+         * here be set to it, and the input almost certainly was meant to be a
+         * posix class, so we can skip further checking.  If instead the syntax
+         * is exactly correct, but the name isn't one of the legal ones, we
+         * will return that as an error below.  But if neither of these apply,
+         * it could be that no posix class was intended at all, or that one
+         * was, but there was a typo.  We tease these apart by doing fuzzy
+         * matching on the name */
+        if (class_number == OOB_NAMEDCLASS && found_problem) {
+            const UV posix_names[][6] = {
+                                                { 'a', 'l', 'n', 'u', 'm' },
+                                                { 'a', 'l', 'p', 'h', 'a' },
+                                                { 'a', 's', 'c', 'i', 'i' },
+                                                { 'b', 'l', 'a', 'n', 'k' },
+                                                { 'c', 'n', 't', 'r', 'l' },
+                                                { 'd', 'i', 'g', 'i', 't' },
+                                                { 'g', 'r', 'a', 'p', 'h' },
+                                                { 'l', 'o', 'w', 'e', 'r' },
+                                                { 'p', 'r', 'i', 'n', 't' },
+                                                { 'p', 'u', 'n', 'c', 't' },
+                                                { 's', 'p', 'a', 'c', 'e' },
+                                                { 'u', 'p', 'p', 'e', 'r' },
+                                                { 'w', 'o', 'r', 'd' },
+                                                { 'x', 'd', 'i', 'g', 'i', 't' }
+                                            };
+            /* The names of the above all have added NULs to make them the same
+             * size, so we need to also have the real lengths */
+            const UV posix_name_lengths[] = {
+                                                sizeof("alnum") - 1,
+                                                sizeof("alpha") - 1,
+                                                sizeof("ascii") - 1,
+                                                sizeof("blank") - 1,
+                                                sizeof("cntrl") - 1,
+                                                sizeof("digit") - 1,
+                                                sizeof("graph") - 1,
+                                                sizeof("lower") - 1,
+                                                sizeof("print") - 1,
+                                                sizeof("punct") - 1,
+                                                sizeof("space") - 1,
+                                                sizeof("upper") - 1,
+                                                sizeof("word")  - 1,
+                                                sizeof("xdigit")- 1
+                                            };
+            unsigned int i;
+            int temp_max = max_distance;    /* Use a temporary, so if we
+                                               reparse, we haven't changed the
+                                               outer one */
+
+            /* Use a smaller max edit distance if we are missing one of the
+             * delimiters */
+            if (   has_opening_bracket + has_opening_colon < 2
+                || has_terminating_bracket + has_terminating_colon < 2)
+            {
+                temp_max--;
+            }
+
+            /* See if the input name is close to a legal one */
+            for (i = 0; i < C_ARRAY_LENGTH(posix_names); i++) {
+
+                /* Short circuit call if the lengths are too far apart to be
+                 * able to match */
+                if (abs( (int) (name_len - posix_name_lengths[i]))
+                    > temp_max)
+                {
+                    continue;
+                }
+
+                if (edit_distance(input_text,
+                                  posix_names[i],
+                                  name_len,
+                                  posix_name_lengths[i],
+                                  temp_max
+                                 )
+                    > -1)
+                { /* If it is close, it probably was intended to be a class */
+                    goto probably_meant_to_be;
+                }
+            }
+
+            /* Here the input name is not close enough to a valid class name
+             * for us to consider it to be intended to be a posix class.  If
+             * we haven't already done so, and the parse found a character that
+             * could have been terminators for the name, but which we absorbed
+             * as typos during the first pass, repeat the parse, signalling it
+             * to stop at that character */
+            if (possible_end && possible_end != (char *) -1) {
+                possible_end = (char *) -1;
+                p = name_start;
+                goto parse_name;
+            }
+
+            /* Here neither pass found a close-enough class name */
+            return NOT_MEANT_TO_BE_A_POSIX_CLASS;
+        }
+
+    probably_meant_to_be:
+
+        /* Here we think that a posix specification was intended.  Update any
+         * parse pointer */
+        if (updated_parse_ptr) {
+            *updated_parse_ptr = (char *) p;
+        }
+
+        /* If a posix class name was intended but incorrectly specified, we
+         * output or return the warnings */
+        if (found_problem) {
+
+            /* We set flags for these issues in the parse loop above instead of
+             * adding them to the list of warnings, because we can parse it
+             * twice, and we only want one warning instance */
+            if (has_upper) {
+                ADD_POSIX_WARNING(p, "the name must be all lowercase letters");
+            }
+            if (has_blank) {
+                ADD_POSIX_WARNING(p, NO_BLANKS_POSIX_WARNING);
+            }
+            if (has_semi_colon) {
+                ADD_POSIX_WARNING(p, SEMI_COLON_POSIX_WARNING);
+            }
+            else if (! has_terminating_colon) {
+                ADD_POSIX_WARNING(p, "there is no terminating ':'");
+            }
+            if (! has_terminating_bracket) {
+                ADD_POSIX_WARNING(p, "there is no terminating ']'");
+            }
+
+            if (warn_text) {
+                if (posix_warnings) {
+                    /* mortalize to avoid a leak with FATAL warnings */
+                    *posix_warnings = (AV *) sv_2mortal((SV *) warn_text);
+                }
+                else {
+                    SvREFCNT_dec_NN(warn_text);
+                }
+            }
+        }
+        else if (class_number != OOB_NAMEDCLASS) {
+            /* If it is a known class, return the class.  The class number
+             * #defines are structured so each complement is +1 to the normal
+             * one */
+            return class_number + complement;
+        }
+        else if (! check_only) {
+
+            /* Here, it is an unrecognized class.  This is an error (unless the
+            * call is to check only, which we've already handled above) */
+            const char * const complement_string = (complement)
+                                                   ? "^"
+                                                   : "";
+            RExC_parse = (char *) p;
+            vFAIL3utf8f("POSIX class [:%s%"UTF8f":] unknown",
+                        complement_string,
+                        UTF8fARG(UTF, RExC_parse - name_start - 2, name_start));
+        }
+    }
  
  
-    return (p
-            && p - RExC_parse > 2 /* [:] evaluates to colon;
-                                      [::] is a bad posix class. */
-            && first_char == *(p - 1));
+    return OOB_NAMEDCLASS;
  }
  }
+#undef ADD_POSIX_WARNING
  
  STATIC unsigned  int
  S_regex_set_precedence(const U8 my_operator) {
  
  STATIC unsigned  int
  S_regex_set_precedence(const U8 my_operator) {
@@ -13398,6 +14477,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      const bool save_fold = FOLD;    /* Temporary */
      char *save_end, *save_parse;    /* Temporaries */
      const bool in_locale = LOC;     /* we turn off /l during processing */
      const bool save_fold = FOLD;    /* Temporary */
      char *save_end, *save_parse;    /* Temporaries */
      const bool in_locale = LOC;     /* we turn off /l during processing */
+    AV* posix_warnings = NULL;
  
      GET_RE_DEBUG_FLAGS_DECL;
  
  
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -13435,27 +14515,27 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                  default:
                      break;
                  case '\\':
                  default:
                      break;
                  case '\\':
-                    /* Skip the next byte (which could cause us to end up in
-                     * the middle of a UTF-8 character, but since none of those
-                     * are confusable with anything we currently handle in this
-                     * switch (invariants all), it's safe.  We'll just hit the
-                     * default: case next time and keep on incrementing until
-                     * we find one of the invariants we do handle. */
+                    /* Skip past this, so the next character gets skipped, after
+                     * the switch */
                      RExC_parse++;
                      if (*RExC_parse == 'c') {
                              /* Skip the \cX notation for control characters */
                              RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
                      }
                      break;
                      RExC_parse++;
                      if (*RExC_parse == 'c') {
                              /* Skip the \cX notation for control characters */
                              RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
                      }
                      break;
+
                  case '[':
                  {
                  case '[':
                  {
-                    /* If this looks like it is a [:posix:] class, leave the
-                     * parse pointer at the '[' to fool regclass() into
-                     * thinking it is part of a '[[:posix:]]'.  That function
-                     * will use strict checking to force a syntax error if it
-                     * doesn't work out to a legitimate class */
-                    bool is_posix_class
-                                    = could_it_be_a_POSIX_class(pRExC_state);
+                    /* See if this is a [:posix:] class. */
+                    bool is_posix_class = (OOB_NAMEDCLASS
+                            < handle_possible_posix(pRExC_state,
+                                                RExC_parse + 1,
+                                                NULL,
+                                                NULL,
+                                                TRUE /* checking only */));
+                    /* If it is a posix class, leave the parse pointer at the
+                     * '[' to fool regclass() into thinking it is part of a
+                     * '[[:posix:]]'. */
                      if (! is_posix_class) {
                          RExC_parse++;
                      }
                      if (! is_posix_class) {
                          RExC_parse++;
                      }
@@ -13470,7 +14550,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                    TRUE, /* silence non-portable warnings. */
                                    TRUE, /* strict */
                                    FALSE, /* Require return to be an ANYOF */
                                    TRUE, /* silence non-portable warnings. */
                                    TRUE, /* strict */
                                    FALSE, /* Require return to be an ANYOF */
-                                  &current
+                                  &current,
+                                  &posix_warnings
                                   ))
                          FAIL2("panic: regclass returned NULL to handle_sets, "
                                "flags=%#"UVxf"", (UV) *flagp);
                                   ))
                          FAIL2("panic: regclass returned NULL to handle_sets, "
                                "flags=%#"UVxf"", (UV) *flagp);
@@ -13488,9 +14569,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                  case ']':
                      if (depth--) break;
                      RExC_parse++;
                  case ']':
                      if (depth--) break;
                      RExC_parse++;
-                    if (RExC_parse < RExC_end
-                        && *RExC_parse == ')')
-                    {
+                    if (*RExC_parse == ')') {
                          node = reganode(pRExC_state, ANYOF, 0);
                          RExC_size += ANYOF_SKIP;
                          nextchar(pRExC_state);
                          node = reganode(pRExC_state, ANYOF, 0);
                          RExC_size += ANYOF_SKIP;
                          nextchar(pRExC_state);
@@ -13509,6 +14588,12 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
          }
  
        no_close:
          }
  
        no_close:
+        /* We output the messages even if warnings are off, because we'll fail
+         * the very next thing, and these give a likely diagnosis for that */
+        if (posix_warnings && av_tindex(posix_warnings) >= 0) {
+            output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
+        }
+
          FAIL("Syntax error in (?[...])");
      }
  
          FAIL("Syntax error in (?[...])");
      }
  
@@ -13516,10 +14601,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
      Perl_ck_warner_d(aTHX_
          packWARN(WARN_EXPERIMENTAL__REGEX_SETS),
          "The regex_sets feature is experimental" REPORT_LOCATION,
      Perl_ck_warner_d(aTHX_
          packWARN(WARN_EXPERIMENTAL__REGEX_SETS),
          "The regex_sets feature is experimental" REPORT_LOCATION,
-            UTF8fARG(UTF, (RExC_parse - RExC_precomp), RExC_precomp),
-            UTF8fARG(UTF,
-                     RExC_end - RExC_start - (RExC_parse - RExC_precomp),
-                     RExC_precomp + (RExC_parse - RExC_precomp)));
+        REPORT_LOCATION_ARGS(RExC_parse));
  
      /* Everything in this construct is a metacharacter.  Operands begin with
       * either a '\' (for an escape sequence), or a '[' for a bracketed
  
      /* Everything in this construct is a metacharacter.  Operands begin with
       * either a '\' (for an escape sequence), or a '[' for a bracketed
@@ -13629,7 +14711,8 @@ redo_curchar:
  
              case '(':
  
  
              case '(':
  
-                if (RExC_parse < RExC_end && (UCHARAT(RExC_parse + 1) == '?'))
+                if (   RExC_parse < RExC_end - 1
+                    && (UCHARAT(RExC_parse + 1) == '?'))
                  {
                      /* If is a '(?', could be an embedded '(?flags:(?[...])'.
                       * This happens when we have some thing like
                  {
                      /* If is a '(?', could be an embedded '(?flags:(?[...])'.
                       * This happens when we have some thing like
@@ -13685,12 +14768,12 @@ redo_curchar:
                       * inversion list, and RExC_parse points to the trailing
                       * ']'; the next character should be the ')' */
                      RExC_parse++;
                       * inversion list, and RExC_parse points to the trailing
                       * ']'; the next character should be the ')' */
                      RExC_parse++;
-                    assert(RExC_parse < RExC_end && UCHARAT(RExC_parse) == ')');
+                    assert(UCHARAT(RExC_parse) == ')');
  
                      /* Then the ')' matching the original '(' handled by this
                       * case: statement */
                      RExC_parse++;
  
                      /* Then the ')' matching the original '(' handled by this
                       * case: statement */
                      RExC_parse++;
-                    assert(RExC_parse < RExC_end && UCHARAT(RExC_parse) == ')');
+                    assert(UCHARAT(RExC_parse) == ')');
  
                      RExC_parse++;
                      RExC_flags = save_flags;
  
                      RExC_parse++;
                      RExC_flags = save_flags;
@@ -13730,7 +14813,8 @@ redo_curchar:
                                FALSE, /* don't silence non-portable warnings.  */
                                TRUE,  /* strict */
                                FALSE, /* Require return to be an ANYOF */
                                FALSE, /* don't silence non-portable warnings.  */
                                TRUE,  /* strict */
                                FALSE, /* Require return to be an ANYOF */
-                              &current))
+                              &current,
+                              NULL))
                  {
                      FAIL2("panic: regclass returned NULL to handle_sets, "
                            "flags=%#"UVxf"", (UV) *flagp);
                  {
                      FAIL2("panic: regclass returned NULL to handle_sets, "
                            "flags=%#"UVxf"", (UV) *flagp);
@@ -13743,23 +14827,33 @@ redo_curchar:
  
              case '[':   /* Is a bracketed character class */
              {
  
              case '[':   /* Is a bracketed character class */
              {
-                bool is_posix_class = could_it_be_a_POSIX_class(pRExC_state);
-
+                /* See if this is a [:posix:] class. */
+                bool is_posix_class = (OOB_NAMEDCLASS
+                            < handle_possible_posix(pRExC_state,
+                                                RExC_parse + 1,
+                                                NULL,
+                                                NULL,
+                                                TRUE /* checking only */));
+                /* If it is a posix class, leave the parse pointer at the '['
+                 * to fool regclass() into thinking it is part of a
+                 * '[[:posix:]]'. */
                  if (! is_posix_class) {
                      RExC_parse++;
                  }
  
                  /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if
                   * multi-char folds are allowed.  */
                  if (! is_posix_class) {
                      RExC_parse++;
                  }
  
                  /* regclass() can only return RESTART_PASS1 and NEED_UTF8 if
                   * multi-char folds are allowed.  */
-                if(!regclass(pRExC_state, flagp,depth+1,
-                             is_posix_class, /* parse the whole char class
-                                                only if not a posix class */
-                             FALSE, /* don't allow multi-char folds */
-                             FALSE, /* don't silence non-portable warnings.  */
-                             TRUE,   /* strict */
-                             FALSE, /* Require return to be an ANYOF */
-                             &current
-                            ))
+                if (!regclass(pRExC_state, flagp,depth+1,
+                                is_posix_class, /* parse the whole char
+                                                    class only if not a
+                                                    posix class */
+                                FALSE, /* don't allow multi-char folds */
+                                TRUE, /* silence non-portable warnings. */
+                                TRUE, /* strict */
+                                FALSE, /* Require return to be an ANYOF */
+                                &current,
+                                NULL
+                                ))
                  {
                      FAIL2("panic: regclass returned NULL to handle_sets, "
                            "flags=%#"UVxf"", (UV) *flagp);
                  {
                      FAIL2("panic: regclass returned NULL to handle_sets, "
                            "flags=%#"UVxf"", (UV) *flagp);
@@ -13900,7 +14994,13 @@ redo_curchar:
                  }
  
                  lhs = av_pop(stack);
                  }
  
                  lhs = av_pop(stack);
-                assert(IS_OPERAND(lhs));
+
+                if (! IS_OPERAND(lhs)) {
+
+                    /* This can happen when there is an empty (), like in
+                     * /(?[[0]+()+])/ */
+                    goto bad_syntax;
+                }
  
                  switch (stacked_operator) {
                      case '&':
  
                  switch (stacked_operator) {
                      case '&':
@@ -13946,9 +15046,20 @@ redo_curchar:
                  av_push(stack, rhs);
                  goto redo_curchar;
  
                  av_push(stack, rhs);
                  goto redo_curchar;
  
-            case '!':   /* Highest priority, right associative, so just push
-                           onto stack */
-                av_push(stack, newSVuv(curchar));
+            case '!':   /* Highest priority, right associative */
+
+                /* If what's already at the top of the stack is another '!",
+                 * they just cancel each other out */
+                if (   (top_ptr = av_fetch(stack, top_index, FALSE))
+                    && (IS_OPERATOR(*top_ptr) && SvUV(*top_ptr) == '!'))
+                {
+                    only_to_avoid_leaks = av_pop(stack);
+                    SvREFCNT_dec(only_to_avoid_leaks);
+                }
+                else { /* Otherwise, since it's right associative, just push
+                          onto the stack */
+                    av_push(stack, newSVuv(curchar));
+                }
                  break;
  
              default:
                  break;
  
              default:
@@ -14070,6 +15181,7 @@ redo_curchar:
                               they're valid on this machine */
                      FALSE, /* similarly, no need for strict */
                      FALSE, /* Require return to be an ANYOF */
                               they're valid on this machine */
                      FALSE, /* similarly, no need for strict */
                      FALSE, /* Require return to be an ANYOF */
+                    NULL,
                      NULL
                  );
      if (!node)
                      NULL
                  );
      if (!node)
@@ -14098,7 +15210,8 @@ redo_curchar:
          assert(OP(node) == ANYOF);
  
          OP(node) = ANYOFL;
          assert(OP(node) == ANYOF);
  
          OP(node) = ANYOFL;
-        ANYOF_FLAGS(node) |= ANYOF_LOC_REQ_UTF8;
+        ANYOF_FLAGS(node)
+                |= ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
      }
  
      if (save_fold) {
      }
  
      if (save_fold) {
@@ -14195,6 +15308,43 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
      }
  }
  
      }
  }
  
+STATIC void
+S_output_or_return_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings, AV** return_posix_warnings)
+{
+    /* If the final parameter is NULL, output the elements of the array given
+     * by '*posix_warnings' as REGEXP warnings.  Otherwise, the elements are
+     * pushed onto it, (creating if necessary) */
+
+    SV * msg;
+    const bool first_is_fatal =  ! return_posix_warnings
+                                && ckDEAD(packWARN(WARN_REGEXP));
+
+    PERL_ARGS_ASSERT_OUTPUT_OR_RETURN_POSIX_WARNINGS;
+
+    while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
+        if (return_posix_warnings) {
+            if (! *return_posix_warnings) { /* mortalize to not leak if
+                                               warnings are fatal */
+                *return_posix_warnings = (AV *) sv_2mortal((SV *) newAV());
+            }
+            av_push(*return_posix_warnings, msg);
+        }
+        else {
+            if (first_is_fatal) {           /* Avoid leaking this */
+                av_undef(posix_warnings);   /* This isn't necessary if the
+                                               array is mortal, but is a
+                                               fail-safe */
+                (void) sv_2mortal(msg);
+                if (PASS2) {
+                    SAVEFREESV(RExC_rx_sv);
+                }
+            }
+            Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
+            SvREFCNT_dec_NN(msg);
+        }
+    }
+}
+
  STATIC AV *
  S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN cp_count)
  {
  STATIC AV *
  S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN cp_count)
  {
@@ -14257,8 +15407,7 @@ S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN c
  #define SKIP_BRACKETED_WHITE_SPACE(do_skip, p)                          \
      STMT_START {                                                        \
          if (do_skip) {                                                  \
  #define SKIP_BRACKETED_WHITE_SPACE(do_skip, p)                          \
      STMT_START {                                                        \
          if (do_skip) {                                                  \
-            while (   p < RExC_end                                      \
-                   && isBLANK_A(UCHARAT(p)))                            \
+            while (isBLANK_A(UCHARAT(p)))                               \
              {                                                           \
                  p++;                                                    \
              }                                                           \
              {                                                           \
                  p++;                                                    \
              }                                                           \
@@ -14276,7 +15425,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   const bool strict,
                   bool optimizable,                  /* ? Allow a non-ANYOF return
                                                         node */
                   const bool strict,
                   bool optimizable,                  /* ? Allow a non-ANYOF return
                                                         node */
-                 SV** ret_invlist  /* Return an inversion list, not a node */
+                 SV** ret_invlist, /* Return an inversion list, not a node */
+                 AV** return_posix_warnings
            )
  {
      /* parse a bracketed class specification.  Most of these will produce an
            )
  {
      /* parse a bracketed class specification.  Most of these will produce an
@@ -14310,7 +15460,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      UV value = OOB_UNICODE, save_value = OOB_UNICODE;
      regnode *ret;
      STRLEN numlen;
      UV value = OOB_UNICODE, save_value = OOB_UNICODE;
      regnode *ret;
      STRLEN numlen;
-    IV namedclass = OOB_NAMEDCLASS;
+    int namedclass = OOB_NAMEDCLASS;
      char *rangebegin = NULL;
      bool need_class = 0;
      SV *listsv = NULL;
      char *rangebegin = NULL;
      bool need_class = 0;
      SV *listsv = NULL;
@@ -14353,8 +15503,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      bool has_user_defined_property = FALSE;
  
      /* inversion list of code points this node matches only when the target
      bool has_user_defined_property = FALSE;
  
      /* inversion list of code points this node matches only when the target
-     * string is in UTF-8.  (Because is under /d) */
-    SV* depends_list = NULL;
+     * string is in UTF-8.  These are all non-ASCII, < 256.  (Because is under
+     * /d) */
+    SV* has_upper_latin1_only_utf8_matches = NULL;
  
      /* Inversion list of code points this node matches regardless of things
       * like locale, folding, utf8ness of the target string */
  
      /* Inversion list of code points this node matches regardless of things
       * like locale, folding, utf8ness of the target string */
@@ -14388,6 +15539,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      const char * orig_parse = RExC_parse;
      const SSize_t orig_size = RExC_size;
      bool posixl_matches_all = FALSE; /* Does /l class have both e.g. \W,\w ? */
      const char * orig_parse = RExC_parse;
      const SSize_t orig_size = RExC_size;
      bool posixl_matches_all = FALSE; /* Does /l class have both e.g. \W,\w ? */
+
+    /* This variable is used to mark where the end in the input is of something
+     * that looks like a POSIX construct but isn't.  During the parse, when
+     * something looks like it could be such a construct is encountered, it is
+     * checked for being one, but not if we've already checked this area of the
+     * input.  Only after this position is reached do we check again */
+    char *not_posix_region_end = RExC_parse - 1;
+
+    AV* posix_warnings = NULL;
+    const bool do_posix_warnings =     return_posix_warnings
+                                   || (PASS2 && ckWARN(WARN_REGEXP));
+
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_REGCLASS;
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_REGCLASS;
@@ -14407,9 +15570,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      ret = reganode(pRExC_state,
                     (LOC)
                      ? ANYOFL
      ret = reganode(pRExC_state,
                     (LOC)
                      ? ANYOFL
-                    : (DEPENDS_SEMANTICS)
-                      ? ANYOFD
-                      : ANYOF,
+                    : ANYOF,
                     0);
  
      if (SIZE_ONLY) {
                     0);
  
      if (SIZE_ONLY) {
@@ -14427,7 +15588,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
  
  
      SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
  
-    if (UCHARAT(RExC_parse) == '^') {  /* Complement of range. */
+    assert(RExC_parse <= RExC_end);
+
+    if (UCHARAT(RExC_parse) == '^') {  /* Complement the class */
         RExC_parse++;
          invert = TRUE;
          allow_multi_folds = FALSE;
         RExC_parse++;
          invert = TRUE;
          allow_multi_folds = FALSE;
@@ -14436,22 +15599,25 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      }
  
      /* Check that they didn't say [:posix:] instead of [[:posix:]] */
      }
  
      /* Check that they didn't say [:posix:] instead of [[:posix:]] */
-    if (!SIZE_ONLY && RExC_parse < RExC_end && POSIXCC(UCHARAT(RExC_parse))) {
-       const char *s = RExC_parse;
-       const char  c = *s++;
-
-        if (*s == '^') {
-            s++;
+    if (! ret_invlist && MAYBE_POSIXCC(UCHARAT(RExC_parse))) {
+        int maybe_class = handle_possible_posix(pRExC_state,
+                                                RExC_parse,
+                                                &not_posix_region_end,
+                                                NULL,
+                                                TRUE /* checking only */);
+        if (PASS2 && maybe_class >= OOB_NAMEDCLASS && do_posix_warnings) {
+            SAVEFREESV(RExC_rx_sv);
+            ckWARN4reg(not_posix_region_end,
+                    "POSIX syntax [%c %c] belongs inside character classes%s",
+                    *RExC_parse, *RExC_parse,
+                    (maybe_class == OOB_NAMEDCLASS)
+                    ? ((POSIXCC_NOTYET(*RExC_parse))
+                        ? " (but this one isn't implemented)"
+                        : " (but this one isn't fully valid)")
+                    : ""
+                    );
+            (void)ReREFCNT_inc(RExC_rx_sv);
          }
          }
-       while (isWORDCHAR(*s))
-           s++;
-       if (*s && c == *s && s[1] == ']') {
-           SAVEFREESV(RExC_rx_sv);
-           ckWARN3reg(s+2,
-                      "POSIX syntax [%c %c] belongs inside character classes",
-                      c, c);
-           (void)ReREFCNT_inc(RExC_rx_sv);
-       }
      }
  
      /* If the caller wants us to just parse a single element, accomplish this
      }
  
      /* If the caller wants us to just parse a single element, accomplish this
@@ -14465,6 +15631,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         goto charclassloop;
  
      while (1) {
         goto charclassloop;
  
      while (1) {
+
+        if (   posix_warnings
+            && av_tindex(posix_warnings) >= 0
+            && RExC_parse > not_posix_region_end)
+        {
+            /* Warnings about posix class issues are considered tentative until
+             * we are far enough along in the parse that we can no longer
+             * change our mind, at which point we either output them or add
+             * them, if it has so specified, to what gets returned to the
+             * caller.  This is done each time through the loop so that a later
+             * class won't zap them before they have been dealt with. */
+            output_or_return_posix_warnings(pRExC_state, posix_warnings,
+                                            return_posix_warnings);
+        }
+
          if  (RExC_parse >= stop_ptr) {
              break;
          }
          if  (RExC_parse >= stop_ptr) {
              break;
          }
@@ -14486,7 +15667,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             element_count++;
              non_portable_endpoint = 0;
         }
             element_count++;
              non_portable_endpoint = 0;
         }
-       if (UTF) {
+       if (UTF && ! UTF8_IS_INVARIANT(* RExC_parse)) {
             value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
                                    &numlen, UTF8_ALLOW_DEFAULT);
             value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
                                    &numlen, UTF8_ALLOW_DEFAULT);
@@ -14495,14 +15676,53 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         else
             value = UCHARAT(RExC_parse++);
  
         else
             value = UCHARAT(RExC_parse++);
  
-        if (value == '['
-            && RExC_parse < RExC_end
-            && POSIXCC(UCHARAT(RExC_parse)))
-        {
-            namedclass = regpposixcc(pRExC_state, value, strict);
-        }
-        else if (value == '\\') {
+        if (value == '[') {
+            char * posix_class_end;
+            namedclass = handle_possible_posix(pRExC_state,
+                                               RExC_parse,
+                                               &posix_class_end,
+                                               do_posix_warnings ? &posix_warnings : NULL,
+                                               FALSE    /* die if error */);
+            if (namedclass > OOB_NAMEDCLASS) {
+
+                /* If there was an earlier attempt to parse this particular
+                 * posix class, and it failed, it was a false alarm, as this
+                 * successful one proves */
+                if (   posix_warnings
+                    && av_tindex(posix_warnings) >= 0
+                    && not_posix_region_end >= RExC_parse
+                    && not_posix_region_end <= posix_class_end)
+                {
+                    av_undef(posix_warnings);
+                }
+
+                RExC_parse = posix_class_end;
+            }
+            else if (namedclass == OOB_NAMEDCLASS) {
+                not_posix_region_end = posix_class_end;
+            }
+            else {
+                namedclass = OOB_NAMEDCLASS;
+            }
+        }
+        else if (   RExC_parse - 1 > not_posix_region_end
+                 && MAYBE_POSIXCC(value))
+        {
+            (void) handle_possible_posix(
+                        pRExC_state,
+                        RExC_parse - 1,  /* -1 because parse has already been
+                                            advanced */
+                        &not_posix_region_end,
+                        do_posix_warnings ? &posix_warnings : NULL,
+                        TRUE /* checking only */);
+        }
+        else if (value == '\\') {
              /* Is a backslash; get the code point of the char after it */
              /* Is a backslash; get the code point of the char after it */
+
+            if (RExC_parse >= RExC_end) {
+                vFAIL("Unmatched [");
+            }
+
             if (UTF && ! UTF8_IS_INVARIANT(UCHARAT(RExC_parse))) {
                 value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
             if (UTF && ! UTF8_IS_INVARIANT(UCHARAT(RExC_parse))) {
                 value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
@@ -14544,6 +15764,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                          &value,    /* Yes single value */
                                          &cp_count, /* Multiple code pt count */
                                          flagp,
                                          &value,    /* Yes single value */
                                          &cp_count, /* Multiple code pt count */
                                          flagp,
+                                        strict,
                                          depth)
                      ) {
  
                                          depth)
                      ) {
  
@@ -14556,11 +15777,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              vFAIL("\\N in a character class must be a named character: \\N{...}");
                          }
                          else if (cp_count == 0) {
                              vFAIL("\\N in a character class must be a named character: \\N{...}");
                          }
                          else if (cp_count == 0) {
-                            if (strict) {
-                                RExC_parse++;   /* Position after the "}" */
-                                vFAIL("Zero length \\N{}");
-                            }
-                            else if (PASS2) {
+                            if (PASS2) {
                                  ckWARNreg(RExC_parse,
                                          "Ignoring zero length \\N{} in character class");
                              }
                                  ckWARNreg(RExC_parse,
                                          "Ignoring zero length \\N{} in character class");
                              }
@@ -14616,7 +15833,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                        |_CORE_SWASH_INIT_ACCEPT_INVLIST;
  
                 if (RExC_parse >= RExC_end)
                                        |_CORE_SWASH_INIT_ACCEPT_INVLIST;
  
                 if (RExC_parse >= RExC_end)
-                   vFAIL2("Empty \\%c{}", (U8)value);
+                   vFAIL2("Empty \\%c", (U8)value);
                 if (*RExC_parse == '{') {
                     const U8 c = (U8)value;
                     e = strchr(RExC_parse, '}');
                 if (*RExC_parse == '{') {
                     const U8 c = (U8)value;
                     e = strchr(RExC_parse, '}');
@@ -14664,6 +15881,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      SV* invlist;
                      char* name;
                      char* base_name;    /* name after any packages are stripped */
                      SV* invlist;
                      char* name;
                      char* base_name;    /* name after any packages are stripped */
+                    char* lookup_name = NULL;
                      const char * const colon_colon = "::";
  
                      /* Try to get the definition of the property into
                      const char * const colon_colon = "::";
  
                      /* Try to get the definition of the property into
@@ -14671,25 +15889,28 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                       * will have its name be <__NAME_i>.  The design is
                       * discussed in commit
                       * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
                       * will have its name be <__NAME_i>.  The design is
                       * discussed in commit
                       * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
-                    name = savepv(Perl_form(aTHX_
-                                          "%s%.*s%s\n",
-                                          (FOLD) ? "__" : "",
-                                          (int)n,
-                                          RExC_parse,
-                                          (FOLD) ? "_i" : ""
-                                ));
+                    name = savepv(Perl_form(aTHX_ "%.*s", (int)n, RExC_parse));
+                    SAVEFREEPV(name);
+                    if (FOLD) {
+                        lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
+                    }
  
                      /* Look up the property name, and get its swash and
                       * inversion list, if the property is found  */
  
                      /* Look up the property name, and get its swash and
                       * inversion list, if the property is found  */
-                    if (swash) {    /* Return any left-overs */
-                        SvREFCNT_dec_NN(swash);
-                    }
-                    swash = _core_swash_init("utf8", name, &PL_sv_undef,
+                    SvREFCNT_dec(swash); /* Free any left-overs */
+                    swash = _core_swash_init("utf8",
+                                             (lookup_name)
+                                              ? lookup_name
+                                              : name,
+                                             &PL_sv_undef,
                                               1, /* binary */
                                               0, /* not tr/// */
                                               NULL, /* No inversion list */
                                               &swash_init_flags
                                              );
                                               1, /* binary */
                                               0, /* not tr/// */
                                               NULL, /* No inversion list */
                                               &swash_init_flags
                                              );
+                    if (lookup_name) {
+                        Safefree(lookup_name);
+                    }
                      if (! swash || ! (invlist = _get_swash_invlist(swash))) {
                          HV* curpkg = (IN_PERL_COMPILETIME)
                                        ? PL_curstash
                      if (! swash || ! (invlist = _get_swash_invlist(swash))) {
                          HV* curpkg = (IN_PERL_COMPILETIME)
                                        ? PL_curstash
@@ -14752,26 +15973,22 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                              pkgname,
                                                              name);
                                  n = strlen(full_name);
                                                              pkgname,
                                                              name);
                                  n = strlen(full_name);
-                                Safefree(name);
                                  name = savepvn(full_name, n);
                                  name = savepvn(full_name, n);
+                                SAVEFREEPV(name);
                              }
                          }
                              }
                          }
-                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%"UTF8f"\n",
+                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%"UTF8f"%s\n",
                                          (value == 'p' ? '+' : '!'),
                                          (value == 'p' ? '+' : '!'),
-                                        UTF8fARG(UTF, n, name));
+                                        (FOLD) ? "__" : "",
+                                        UTF8fARG(UTF, n, name),
+                                        (FOLD) ? "_i" : "");
                          has_user_defined_property = TRUE;
                          optimizable = FALSE;    /* Will have to leave this an
                                                     ANYOF node */
  
                          has_user_defined_property = TRUE;
                          optimizable = FALSE;    /* Will have to leave this an
                                                     ANYOF node */
  
-                        /* We don't know yet, so have to assume that the
-                         * property could match something in the upper Latin1
-                         * range, hence something that isn't utf8.  Note that
-                         * this would cause things in <depends_list> to match
-                         * inappropriately, except that any \p{}, including
-                         * this one forces Unicode semantics, which means there
-                         * is no <depends_list> */
-                        ANYOF_FLAGS(ret)
-                                      |= ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES;
+                        /* We don't know yet what this matches, so have to flag
+                         * it */
+                        ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
                      }
                      else {
  
                      }
                      else {
  
@@ -14812,7 +16029,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              _invlist_union(properties, invlist, &properties);
                         }
                     }
                              _invlist_union(properties, invlist, &properties);
                         }
                     }
-                   Safefree(name);
                 }
                 RExC_parse = e + 1;
                  namedclass = ANYOF_UNIPROP;  /* no official name, but it's
                 }
                 RExC_parse = e + 1;
                  namedclass = ANYOF_UNIPROP;  /* no official name, but it's
@@ -14907,7 +16123,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
               recode_encoding:
                 if (! RExC_override_recoding) {
                     SV* enc = _get_encoding();
               recode_encoding:
                 if (! RExC_override_recoding) {
                     SV* enc = _get_encoding();
-                   value = reg_recode((const char)(U8)value, &enc);
+                   value = reg_recode((U8)value, &enc);
                     if (!enc) {
                          if (strict) {
                              vFAIL("Invalid escape in the specified encoding");
                     if (!enc) {
                          if (strict) {
                              vFAIL("Invalid escape in the specified encoding");
@@ -15365,7 +16581,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             "\"%.*s\" is more clearly written simply as \"%s\"",
                             (int) (RExC_parse - rangebegin),
                             rangebegin,
                             "\"%.*s\" is more clearly written simply as \"%s\"",
                             (int) (RExC_parse - rangebegin),
                             rangebegin,
-                           cntrl_to_mnemonic((char) value)
+                           cntrl_to_mnemonic((U8) value)
                          );
                  }
              }
                          );
                  }
              }
@@ -15418,6 +16634,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         range = 0; /* this range (if it was one) is done now */
      } /* End of loop through all the text within the brackets */
  
         range = 0; /* this range (if it was one) is done now */
      } /* End of loop through all the text within the brackets */
  
+
+    if (   posix_warnings && av_tindex(posix_warnings) >= 0) {
+        output_or_return_posix_warnings(pRExC_state, posix_warnings,
+                                        return_posix_warnings);
+    }
+
      /* If anything in the class expands to more than one character, we have to
       * deal with them by building up a substitute parse string, and recursively
       * calling reg() on it, instead of proceeding */
      /* If anything in the class expands to more than one character, we have to
       * deal with them by building up a substitute parse string, and recursively
       * calling reg() on it, instead of proceeding */
@@ -15427,11 +16649,17 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         STRLEN len;
         char *save_end = RExC_end;
         char *save_parse = RExC_parse;
         STRLEN len;
         char *save_end = RExC_end;
         char *save_parse = RExC_parse;
+       char *save_start = RExC_start;
+        STRLEN prefix_end = 0;      /* We copy the character class after a
+                                       prefix supplied here.  This is the size
+                                       + 1 of that prefix */
          bool first_time = TRUE;     /* First multi-char occurrence doesn't get
                                         a "|" */
          I32 reg_flags;
  
          assert(! invert);
          bool first_time = TRUE;     /* First multi-char occurrence doesn't get
                                         a "|" */
          I32 reg_flags;
  
          assert(! invert);
+        assert(RExC_precomp_adj == 0); /* Only one level of recursion allowed */
+
  #if 0   /* Have decided not to deal with multi-char folds in inverted classes,
             because too confusing */
          if (invert) {
  #if 0   /* Have decided not to deal with multi-char folds in inverted classes,
             because too confusing */
          if (invert) {
@@ -15465,8 +16693,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
           * multi-character folds, have to include it in recursive parsing */
          if (element_count) {
              sv_catpv(substitute_parse, "|[");
           * multi-character folds, have to include it in recursive parsing */
          if (element_count) {
              sv_catpv(substitute_parse, "|[");
+            prefix_end = SvCUR(substitute_parse);
              sv_catpvn(substitute_parse, orig_parse, RExC_parse - orig_parse);
              sv_catpvn(substitute_parse, orig_parse, RExC_parse - orig_parse);
-            sv_catpv(substitute_parse, "]");
+
+            /* Put in a closing ']' only if not going off the end, as otherwise
+             * we are adding something that really isn't there */
+            if (RExC_parse < RExC_end) {
+                sv_catpv(substitute_parse, "]");
+            }
          }
  
          sv_catpv(substitute_parse, ")");
          }
  
          sv_catpv(substitute_parse, ")");
@@ -15479,7 +16713,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          }
  #endif
  
          }
  #endif
  
-       RExC_parse = SvPV(substitute_parse, len);
+        /* Set up the data structure so that any errors will be properly
+         * reported.  See the comments at the definition of
+         * REPORT_LOCATION_ARGS for details */
+        RExC_precomp_adj = orig_parse - RExC_precomp;
+       RExC_start =  RExC_parse = SvPV(substitute_parse, len);
+        RExC_adjusted_start = RExC_start + prefix_end;
         RExC_end = RExC_parse + len;
          RExC_in_multi_char_class = 1;
         RExC_override_recoding = 1;
         RExC_end = RExC_parse + len;
          RExC_in_multi_char_class = 1;
         RExC_override_recoding = 1;
@@ -15489,7 +16728,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
         *flagp |= reg_flags&(HASWIDTH|SIMPLE|SPSTART|POSTPONED|RESTART_PASS1|NEED_UTF8);
  
  
         *flagp |= reg_flags&(HASWIDTH|SIMPLE|SPSTART|POSTPONED|RESTART_PASS1|NEED_UTF8);
  
-       RExC_parse = save_parse;
+        /* And restore so can parse the rest of the pattern */
+        RExC_parse = save_parse;
+       RExC_start = RExC_adjusted_start = save_start;
+        RExC_precomp_adj = 0;
         RExC_end = save_end;
         RExC_in_multi_char_class = 0;
         RExC_override_recoding = 0;
         RExC_end = save_end;
         RExC_in_multi_char_class = 0;
         RExC_override_recoding = 0;
@@ -15518,9 +16760,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          if (UNLIKELY(posixl_matches_all)) {
              op = SANY;
          }
          if (UNLIKELY(posixl_matches_all)) {
              op = SANY;
          }
-        else if (namedclass > OOB_NAMEDCLASS) { /* this is a named class, like
-                                                   \w or [:digit:] or \p{foo}
-                                                 */
+        else if (namedclass > OOB_NAMEDCLASS) { /* this is a single named
+                                                   class, like \w or [:digit:]
+                                                   or \p{foo} */
  
              /* All named classes are mapped into POSIXish nodes, with its FLAG
               * argument giving which class it is */
  
              /* All named classes are mapped into POSIXish nodes, with its FLAG
               * argument giving which class it is */
@@ -15769,9 +17011,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                              PL_fold_latin1[j]);
                              }
                              else {
                                                              PL_fold_latin1[j]);
                              }
                              else {
-                                depends_list =
-                                 add_cp_to_invlist(depends_list,
-                                                   PL_fold_latin1[j]);
+                                has_upper_latin1_only_utf8_matches
+                                    = add_cp_to_invlist(
+                                            has_upper_latin1_only_utf8_matches,
+                                            PL_fold_latin1[j]);
                              }
                          }
  
                              }
                          }
  
@@ -15835,8 +17078,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              else {
                                  /* Similarly folds involving non-ascii Latin1
                                  * characters under /d are added to their list */
                              else {
                                  /* Similarly folds involving non-ascii Latin1
                                  * characters under /d are added to their list */
-                                depends_list = add_cp_to_invlist(depends_list,
-                                                                 c);
+                                has_upper_latin1_only_utf8_matches
+                                        = add_cp_to_invlist(
+                                           has_upper_latin1_only_utf8_matches,
+                                           c);
                              }
                          }
                      }
                              }
                          }
                      }
@@ -15912,13 +17157,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  cp_list = posixes;
              }
  
                  cp_list = posixes;
              }
  
-            if (depends_list) {
-                _invlist_union(depends_list, nonascii_but_latin1_properties,
-                               &depends_list);
+            if (has_upper_latin1_only_utf8_matches) {
+                _invlist_union(has_upper_latin1_only_utf8_matches,
+                               nonascii_but_latin1_properties,
+                               &has_upper_latin1_only_utf8_matches);
                  SvREFCNT_dec_NN(nonascii_but_latin1_properties);
              }
              else {
                  SvREFCNT_dec_NN(nonascii_but_latin1_properties);
              }
              else {
-                depends_list = nonascii_but_latin1_properties;
+                has_upper_latin1_only_utf8_matches
+                                            = nonascii_but_latin1_properties;
              }
          }
      }
              }
          }
      }
@@ -15932,15 +17179,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * class that isn't a Unicode property, and which matches above Unicode, \W
       * or [\x{110000}] for example.
       * (Note that in this case, unlike the Posix one above, there is no
       * class that isn't a Unicode property, and which matches above Unicode, \W
       * or [\x{110000}] for example.
       * (Note that in this case, unlike the Posix one above, there is no
-     * <depends_list>, because having a Unicode property forces Unicode
-     * semantics */
+     * <has_upper_latin1_only_utf8_matches>, because having a Unicode property
+     * forces Unicode semantics */
      if (properties) {
          if (cp_list) {
  
              /* If it matters to the final outcome, see if a non-property
               * component of the class matches above Unicode.  If so, the
               * warning gets suppressed.  This is true even if just a single
      if (properties) {
          if (cp_list) {
  
              /* If it matters to the final outcome, see if a non-property
               * component of the class matches above Unicode.  If so, the
               * warning gets suppressed.  This is true even if just a single
-             * such code point is specified, as though not strictly correct if
+             * such code point is specified, as, though not strictly correct if
               * another such code point is matched against, the fact that they
               * are using above-Unicode code points indicates they should know
               * the issues involved */
               * another such code point is matched against, the fact that they
               * are using above-Unicode code points indicates they should know
               * the issues involved */
@@ -15981,27 +17228,115 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * fetching).  We know to set the flag if we have a non-NULL list for UTF-8
       * locales, or the class matches at least one 0-255 range code point */
      if (LOC && FOLD) {
       * fetching).  We know to set the flag if we have a non-NULL list for UTF-8
       * locales, or the class matches at least one 0-255 range code point */
      if (LOC && FOLD) {
+
+        /* Some things on the list might be unconditionally included because of
+         * other components.  Remove them, and clean up the list if it goes to
+         * 0 elements */
+        if (only_utf8_locale_list && cp_list) {
+            _invlist_subtract(only_utf8_locale_list, cp_list,
+                              &only_utf8_locale_list);
+
+            if (_invlist_len(only_utf8_locale_list) == 0) {
+                SvREFCNT_dec_NN(only_utf8_locale_list);
+                only_utf8_locale_list = NULL;
+            }
+        }
          if (only_utf8_locale_list) {
          if (only_utf8_locale_list) {
-            ANYOF_FLAGS(ret) |= ANYOF_LOC_FOLD;
+            ANYOF_FLAGS(ret)
+                 |=  ANYOFL_FOLD
+                    |ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
          }
          else if (cp_list) { /* Look to see if a 0-255 code point is in list */
              UV start, end;
              invlist_iterinit(cp_list);
              if (invlist_iternext(cp_list, &start, &end) && start < 256) {
          }
          else if (cp_list) { /* Look to see if a 0-255 code point is in list */
              UV start, end;
              invlist_iterinit(cp_list);
              if (invlist_iternext(cp_list, &start, &end) && start < 256) {
-                ANYOF_FLAGS(ret) |= ANYOF_LOC_FOLD;
+                ANYOF_FLAGS(ret) |= ANYOFL_FOLD;
              }
              invlist_iterfinish(cp_list);
          }
      }
  
              }
              invlist_iterfinish(cp_list);
          }
      }
  
+#define MATCHES_ALL_NON_UTF8_NON_ASCII(ret)                                 \
+    (   DEPENDS_SEMANTICS                                                   \
+     && (ANYOF_FLAGS(ret)                                                   \
+        & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
+
+    /* See if we can simplify things under /d */
+    if (   has_upper_latin1_only_utf8_matches
+        || MATCHES_ALL_NON_UTF8_NON_ASCII(ret))
+    {
+        /* But not if we are inverting, as that screws it up */
+        if (! invert) {
+            if (has_upper_latin1_only_utf8_matches) {
+                if (MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
+
+                    /* Here, we have both the flag and inversion list.  Any
+                     * character in 'has_upper_latin1_only_utf8_matches'
+                     * matches when UTF-8 is in effect, but it also matches
+                     * when UTF-8 is not in effect because of
+                     * MATCHES_ALL_NON_UTF8_NON_ASCII.  Therefore it matches
+                     * unconditionally, so can be added to the regular list,
+                     * and 'has_upper_latin1_only_utf8_matches' cleared */
+                    _invlist_union(cp_list,
+                                   has_upper_latin1_only_utf8_matches,
+                                   &cp_list);
+                    SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
+                    has_upper_latin1_only_utf8_matches = NULL;
+                }
+                else if (cp_list) {
+
+                    /* Here, 'cp_list' gives chars that always match, and
+                     * 'has_upper_latin1_only_utf8_matches' gives chars that
+                     * were specified to match only if the target string is in
+                     * UTF-8.  It may be that these overlap, so we can subtract
+                     * the unconditionally matching from the conditional ones,
+                     * to make the conditional list as small as possible,
+                     * perhaps even clearing it, in which case more
+                     * optimizations are possible later */
+                    _invlist_subtract(has_upper_latin1_only_utf8_matches,
+                                      cp_list,
+                                      &has_upper_latin1_only_utf8_matches);
+                    if (_invlist_len(has_upper_latin1_only_utf8_matches) == 0) {
+                        SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
+                        has_upper_latin1_only_utf8_matches = NULL;
+                    }
+                }
+            }
+
+            /* Similarly, if the unconditional matches include every upper
+             * latin1 character, we can clear that flag to permit later
+             * optimizations */
+            if (cp_list && MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
+                SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1);
+                _invlist_subtract(only_non_utf8_list, cp_list,
+                                  &only_non_utf8_list);
+                if (_invlist_len(only_non_utf8_list) == 0) {
+                    ANYOF_FLAGS(ret) &= ~ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+                }
+                SvREFCNT_dec_NN(only_non_utf8_list);
+                only_non_utf8_list = NULL;;
+            }
+        }
+
+        /* If we haven't gotten rid of all conditional matching, we change the
+         * regnode type to indicate that */
+        if (   has_upper_latin1_only_utf8_matches
+            || MATCHES_ALL_NON_UTF8_NON_ASCII(ret))
+        {
+            OP(ret) = ANYOFD;
+            optimizable = FALSE;
+        }
+    }
+#undef MATCHES_ALL_NON_UTF8_NON_ASCII
+
      /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
       * at compile time.  Besides not inverting folded locale now, we can't
       * invert if there are things such as \w, which aren't known until runtime
       * */
      if (cp_list
          && invert
      /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
       * at compile time.  Besides not inverting folded locale now, we can't
       * invert if there are things such as \w, which aren't known until runtime
       * */
      if (cp_list
          && invert
+        && OP(ret) != ANYOFD
          && ! (ANYOF_FLAGS(ret) & (ANYOF_LOCALE_FLAGS))
          && ! (ANYOF_FLAGS(ret) & (ANYOF_LOCALE_FLAGS))
-       && ! depends_list
         && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
      {
          _invlist_invert(cp_list);
         && ! HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
      {
          _invlist_invert(cp_list);
@@ -16043,16 +17378,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * adjacent such nodes.  And if the class is equivalent to things like /./,
       * expensive run-time swashes can be avoided.  Now that we have more
       * complete information, we can find things necessarily missed by the
       * adjacent such nodes.  And if the class is equivalent to things like /./,
       * expensive run-time swashes can be avoided.  Now that we have more
       * complete information, we can find things necessarily missed by the
-     * earlier code.  I (khw) did some benchmarks and found essentially no
-     * speed difference between using a POSIXA node versus an ANYOF node, so
-     * there is no reason to optimize, for example [A-Za-z0-9_] into
-     * [[:word:]]/a (although if we did it in the sizing pass it would save
-     * space).  _invlistEQ() could be used if one ever wanted to do something
-     * like this at this point in the code */
-
-    if (optimizable && cp_list && ! invert && ! depends_list) {
+     * earlier code.  Another possible "optimization" that isn't done is that
+     * something like [Ee] could be changed into an EXACTFU.  khw tried this
+     * and found that the ANYOF is faster, including for code points not in the
+     * bitmap.  This still might make sense to do, provided it got joined with
+     * an adjacent node(s) to create a longer EXACTFU one.  This could be
+     * accomplished by creating a pseudo ANYOF_EXACTFU node type that the join
+     * routine would know is joinable.  If that didn't happen, the node type
+     * could then be made a straight ANYOF */
+
+    if (optimizable && cp_list && ! invert) {
          UV start, end;
          U8 op = END;  /* The optimzation node-type */
          UV start, end;
          U8 op = END;  /* The optimzation node-type */
+        int posix_class = -1;   /* Illegal value */
          const char * cur_parse= RExC_parse;
  
          invlist_iterinit(cp_list);
          const char * cur_parse= RExC_parse;
  
          invlist_iterinit(cp_list);
@@ -16135,6 +17473,49 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          }
          invlist_iterfinish(cp_list);
  
          }
          invlist_iterfinish(cp_list);
  
+        if (op == END) {
+            const UV cp_list_len = _invlist_len(cp_list);
+            const UV* cp_list_array = invlist_array(cp_list);
+
+            /* Here, didn't find an optimization.  See if this matches any of
+             * the POSIX classes.  These run slightly faster for above-Unicode
+             * code points, so don't bother with POSIXA ones nor the 2 that
+             * have no above-Unicode matches.  We can avoid these checks unless
+             * the ANYOF matches at least as high as the lowest POSIX one
+             * (which was manually found to be \v.  The actual code point may
+             * increase in later Unicode releases, if a higher code point is
+             * assigned to be \v, but this code will never break.  It would
+             * just mean we could execute the checks for posix optimizations
+             * unnecessarily) */
+
+            if (cp_list_array[cp_list_len-1] > 0x2029) {
+                for (posix_class = 0;
+                     posix_class <= _HIGHEST_REGCOMP_DOT_H_SYNC;
+                     posix_class++)
+                {
+                    int try_inverted;
+                    if (posix_class == _CC_ASCII || posix_class == _CC_CNTRL) {
+                        continue;
+                    }
+                    for (try_inverted = 0; try_inverted < 2; try_inverted++) {
+
+                        /* Check if matches normal or inverted */
+                        if (_invlistEQ(cp_list,
+                                       PL_XPosix_ptrs[posix_class],
+                                       try_inverted))
+                        {
+                            op = (try_inverted)
+                                 ? NPOSIXU
+                                 : POSIXU;
+                            *flagp |= HASWIDTH|SIMPLE;
+                            goto found_posix;
+                        }
+                    }
+                }
+              found_posix: ;
+            }
+        }
+
          if (op != END) {
              RExC_parse = (char *)orig_parse;
              RExC_emit = (regnode *)orig_emit;
          if (op != END) {
              RExC_parse = (char *)orig_parse;
              RExC_emit = (regnode *)orig_emit;
@@ -16152,6 +17533,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                             TRUE /* downgradable to EXACT */
                                            );
              }
                                             TRUE /* downgradable to EXACT */
                                            );
              }
+            else if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
+                FLAGS(ret) = posix_class;
+            }
  
              SvREFCNT_dec_NN(cp_list);
              return ret;
  
              SvREFCNT_dec_NN(cp_list);
              return ret;
@@ -16172,16 +17556,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      /* Here, the bitmap has been populated with all the Latin1 code points that
       * always match.  Can now add to the overall list those that match only
  
      /* Here, the bitmap has been populated with all the Latin1 code points that
       * always match.  Can now add to the overall list those that match only
-     * when the target string is UTF-8 (<depends_list>). */
-    if (depends_list) {
+     * when the target string is UTF-8 (<has_upper_latin1_only_utf8_matches>).
+     * */
+    if (has_upper_latin1_only_utf8_matches) {
         if (cp_list) {
         if (cp_list) {
-           _invlist_union(cp_list, depends_list, &cp_list);
-           SvREFCNT_dec_NN(depends_list);
+           _invlist_union(cp_list,
+                           has_upper_latin1_only_utf8_matches,
+                           &cp_list);
+           SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
         }
         else {
         }
         else {
-           cp_list = depends_list;
+           cp_list = has_upper_latin1_only_utf8_matches;
         }
         }
-        ANYOF_FLAGS(ret) |= ANYOF_HAS_UTF8_NONBITMAP_MATCHES;
+        ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
      }
  
      /* If there is a swash and more than one element, we can't use the swash in
      }
  
      /* If there is a swash and more than one element, we can't use the swash in
@@ -16249,18 +17636,13 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
  
      if (! cp_list && ! runtime_defns && ! only_utf8_locale_list) {
          assert(! (ANYOF_FLAGS(node)
  
      if (! cp_list && ! runtime_defns && ! only_utf8_locale_list) {
          assert(! (ANYOF_FLAGS(node)
-                  & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
-                     |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES)));
+                & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP));
         ARG_SET(node, ANYOF_ONLY_HAS_BITMAP);
      }
      else {
         AV * const av = newAV();
         SV *rv;
  
         ARG_SET(node, ANYOF_ONLY_HAS_BITMAP);
      }
      else {
         AV * const av = newAV();
         SV *rv;
  
-        assert(ANYOF_FLAGS(node)
-               & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
-                  |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES|ANYOF_LOC_FOLD));
-
         av_store(av, 0, (runtime_defns)
                         ? SvREFCNT_inc(runtime_defns) : &PL_sv_undef);
         if (swash) {
         av_store(av, 0, (runtime_defns)
                         ? SvREFCNT_inc(runtime_defns) : &PL_sv_undef);
         if (swash) {
@@ -16297,7 +17679,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                                          bool doinit,
                                          SV** listsvp,
                                          SV** only_utf8_locale_ptr,
                                          bool doinit,
                                          SV** listsvp,
                                          SV** only_utf8_locale_ptr,
-                                        SV*  exclude_list)
+                                        SV** output_invlist)
  
  {
      /* For internal core use only.
  
  {
      /* For internal core use only.
@@ -16309,24 +17691,32 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
       *    swash exists, by calling this function with 'doinit' set to false, in
       *    which case the components that will be used to eventually create the
       *    swash are returned  (in a printable form).
       *    swash exists, by calling this function with 'doinit' set to false, in
       *    which case the components that will be used to eventually create the
       *    swash are returned  (in a printable form).
-     * If <exclude_list> is not NULL, it is an inversion list of things to
-     *    exclude from what's returned in <listsvp>.
+     * If <only_utf8_locale_ptr> is not NULL, it is where this routine is to
+     *    store an inversion list of code points that should match only if the
+     *    execution-time locale is a UTF-8 one.
+     * If <output_invlist> is not NULL, it is where this routine is to store an
+     *    inversion list of the code points that would be instead returned in
+     *    <listsvp> if this were NULL.  Thus, what gets output in <listsvp>
+     *    when this parameter is used, is just the non-code point data that
+     *    will go into creating the swash.  This currently should be just
+     *    user-defined properties whose definitions were not known at compile
+     *    time.  Using this parameter allows for easier manipulation of the
+     *    swash's data by the caller.  It is illegal to call this function with
+     *    this parameter set, but not <listsvp>
+     *
       * Tied intimately to how S_set_ANYOF_arg sets up the data structure.  Note
       * that, in spite of this function's name, the swash it returns may include
       * the bitmap data as well */
  
      SV *sw  = NULL;
      SV *si  = NULL;         /* Input swash initialization string */
       * Tied intimately to how S_set_ANYOF_arg sets up the data structure.  Note
       * that, in spite of this function's name, the swash it returns may include
       * the bitmap data as well */
  
      SV *sw  = NULL;
      SV *si  = NULL;         /* Input swash initialization string */
-    SV*  invlist = NULL;
+    SV* invlist = NULL;
  
      RXi_GET_DECL(prog,progi);
      const struct reg_data * const data = prog ? progi->data : NULL;
  
      PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
  
      RXi_GET_DECL(prog,progi);
      const struct reg_data * const data = prog ? progi->data : NULL;
  
      PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
-
-    assert(ANYOF_FLAGS(node)
-        & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
-           |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES|ANYOF_LOC_FOLD));
+    assert(! output_invlist || listsvp);
  
      if (data && data->count) {
         const U32 n = ARG(node);
  
      if (data && data->count) {
         const U32 n = ARG(node);
@@ -16339,9 +17729,6 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
  
             si = *ary;  /* ary[0] = the string to initialize the swash with */
  
  
             si = *ary;  /* ary[0] = the string to initialize the swash with */
  
-           /* Elements 3 and 4 are either both present or both absent. [3] is
-            * any inversion list generated at compile time; [4] indicates if
-            * that inversion list has any user-defined properties in it. */
              if (av_tindex(av) >= 2) {
                  if (only_utf8_locale_ptr
                      && ary[2]
              if (av_tindex(av) >= 2) {
                  if (only_utf8_locale_ptr
                      && ary[2]
@@ -16354,6 +17741,10 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                      *only_utf8_locale_ptr = NULL;
                  }
  
                      *only_utf8_locale_ptr = NULL;
                  }
  
+                /* Elements 3 and 4 are either both present or both absent. [3]
+                 * is any inversion list generated at compile time; [4]
+                 * indicates if that inversion list has any user-defined
+                 * properties in it. */
                  if (av_tindex(av) >= 3) {
                      invlist = ary[3];
                      if (SvUV(ary[4])) {
                  if (av_tindex(av) >= 3) {
                      invlist = ary[3];
                      if (SvUV(ary[4])) {
@@ -16387,7 +17778,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
  
      /* If requested, return a printable version of what this swash matches */
      if (listsvp) {
  
      /* If requested, return a printable version of what this swash matches */
      if (listsvp) {
-       SV* matches_string = newSVpvs("");
+       SV* matches_string = NULL;
  
          /* The swash should be used, if possible, to get the data, as it
           * contains the resolved data.  But this function can be called at
  
          /* The swash should be used, if possible, to get the data, as it
           * contains the resolved data.  But this function can be called at
@@ -16397,22 +17788,124 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
         if ((! sw || (invlist = _get_swash_invlist(sw)) == NULL)
              && (si && si != &PL_sv_undef))
          {
         if ((! sw || (invlist = _get_swash_invlist(sw)) == NULL)
              && (si && si != &PL_sv_undef))
          {
-           sv_catsv(matches_string, si);
+            /* Here, we only have 'si' (and possibly some passed-in data in
+             * 'invlist', which is handled below)  If the caller only wants
+             * 'si', use that.  */
+            if (! output_invlist) {
+                matches_string = newSVsv(si);
+            }
+            else {
+                /* But if the caller wants an inversion list of the node, we
+                 * need to parse 'si' and place as much as possible in the
+                 * desired output inversion list, making 'matches_string' only
+                 * contain the currently unresolvable things */
+                const char *si_string = SvPVX(si);
+                STRLEN remaining = SvCUR(si);
+                UV prev_cp = 0;
+                U8 count = 0;
+
+                /* Ignore everything before the first new-line */
+                while (*si_string != '\n' && remaining > 0) {
+                    si_string++;
+                    remaining--;
+                }
+                assert(remaining > 0);
+
+                si_string++;
+                remaining--;
+
+                while (remaining > 0) {
+
+                    /* The data consists of just strings defining user-defined
+                     * property names, but in prior incarnations, and perhaps
+                     * somehow from pluggable regex engines, it could still
+                     * hold hex code point definitions.  Each component of a
+                     * range would be separated by a tab, and each range by a
+                     * new-line.  If these are found, instead add them to the
+                     * inversion list */
+                    I32 grok_flags =  PERL_SCAN_SILENT_ILLDIGIT
+                                     |PERL_SCAN_SILENT_NON_PORTABLE;
+                    STRLEN len = remaining;
+                    UV cp = grok_hex(si_string, &len, &grok_flags, NULL);
+
+                    /* If the hex decode routine found something, it should go
+                     * up to the next \n */
+                    if (   *(si_string + len) == '\n') {
+                        if (count) {    /* 2nd code point on line */
+                            *output_invlist = _add_range_to_invlist(*output_invlist, prev_cp, cp);
+                        }
+                        else {
+                            *output_invlist = add_cp_to_invlist(*output_invlist, cp);
+                        }
+                        count = 0;
+                        goto prepare_for_next_iteration;
+                    }
+
+                    /* If the hex decode was instead for the lower range limit,
+                     * save it, and go parse the upper range limit */
+                    if (*(si_string + len) == '\t') {
+                        assert(count == 0);
+
+                        prev_cp = cp;
+                        count = 1;
+                      prepare_for_next_iteration:
+                        si_string += len + 1;
+                        remaining -= len + 1;
+                        continue;
+                    }
+
+                    /* Here, didn't find a legal hex number.  Just add it from
+                     * here to the next \n */
+
+                    remaining -= len;
+                    while (*(si_string + len) != '\n' && remaining > 0) {
+                        remaining--;
+                        len++;
+                    }
+                    if (*(si_string + len) == '\n') {
+                        len++;
+                        remaining--;
+                    }
+                    if (matches_string) {
+                        sv_catpvn(matches_string, si_string, len - 1);
+                    }
+                    else {
+                        matches_string = newSVpvn(si_string, len - 1);
+                    }
+                    si_string += len;
+                    sv_catpvs(matches_string, " ");
+                } /* end of loop through the text */
+
+                assert(matches_string);
+                if (SvCUR(matches_string)) {  /* Get rid of trailing blank */
+                    SvCUR_set(matches_string, SvCUR(matches_string) - 1);
+                }
+            } /* end of has an 'si' but no swash */
         }
  
         }
  
-       /* Add the inversion list to whatever we have.  This may have come from
-        * the swash, or from an input parameter */
-       if (invlist) {
-            if (exclude_list) {
-                SV* clone = invlist_clone(invlist);
-                _invlist_subtract(clone, exclude_list, &clone);
-                sv_catsv(matches_string, _invlist_contents(clone));
-                SvREFCNT_dec_NN(clone);
+        /* If we have a swash in place, its equivalent inversion list was above
+         * placed into 'invlist'.  If not, this variable may contain a stored
+         * inversion list which is information beyond what is in 'si' */
+        if (invlist) {
+
+            /* Again, if the caller doesn't want the output inversion list, put
+             * everything in 'matches-string' */
+            if (! output_invlist) {
+                if ( ! matches_string) {
+                    matches_string = newSVpvs("\n");
+                }
+                sv_catsv(matches_string, invlist_contents(invlist,
+                                                  TRUE /* traditional style */
+                                                  ));
+            }
+            else if (! *output_invlist) {
+                *output_invlist = invlist_clone(invlist);
              }
              else {
              }
              else {
-                sv_catsv(matches_string, _invlist_contents(invlist));
+                _invlist_union(*output_invlist, invlist, output_invlist);
              }
              }
-       }
+        }
+
         *listsvp = matches_string;
      }
  
         *listsvp = matches_string;
      }
  
@@ -16524,14 +18017,16 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
  {
      PERL_ARGS_ASSERT_NEXTCHAR;
  
  {
      PERL_ARGS_ASSERT_NEXTCHAR;
  
-    assert(   ! UTF
-           || UTF8_IS_INVARIANT(*RExC_parse)
-           || UTF8_IS_START(*RExC_parse));
+    if (RExC_parse < RExC_end) {
+        assert(   ! UTF
+               || UTF8_IS_INVARIANT(*RExC_parse)
+               || UTF8_IS_START(*RExC_parse));
  
  
-    RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+        RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
  
  
-    skip_to_be_ignored_text(pRExC_state, &RExC_parse,
-                            FALSE /* Don't assume /x */ );
+        skip_to_be_ignored_text(pRExC_state, &RExC_parse,
+                                FALSE /* Don't assume /x */ );
+    }
  }
  
  STATIC regnode *
  }
  
  STATIC regnode *
@@ -16730,10 +18225,11 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
  - regtail - set the next-pointer at the end of a node chain of p to val.
  - SEE ALSO: regtail_study
  */
  - regtail - set the next-pointer at the end of a node chain of p to val.
  - SEE ALSO: regtail_study
  */
-/* TODO: All three parms should be const */
  STATIC void
  STATIC void
-S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p,
-                const regnode *val,U32 depth)
+S_regtail(pTHX_ RExC_state_t * pRExC_state,
+                const regnode * const p,
+                const regnode * const val,
+                const U32 depth)
  {
      regnode *scan;
      GET_RE_DEBUG_FLAGS_DECL;
  {
      regnode *scan;
      GET_RE_DEBUG_FLAGS_DECL;
@@ -16747,7 +18243,7 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p,
         return;
  
      /* Find last node. */
         return;
  
      /* Find last node. */
-    scan = p;
+    scan = (regnode *) p;
      for (;;) {
         regnode * const temp = regnext(scan);
          DEBUG_PARSE_r({
      for (;;) {
         regnode * const temp = regnext(scan);
          DEBUG_PARSE_r({
@@ -17043,6 +18539,53 @@ Perl_regdump(pTHX_ const regexp *r)
  #endif /* DEBUGGING */
  }
  
  #endif /* DEBUGGING */
  }
  
+/* Should be synchronized with ANYOF_ #defines in regcomp.h */
+#ifdef DEBUGGING
+
+#  if   _CC_WORDCHAR != 0 || _CC_DIGIT != 1        || _CC_ALPHA != 2    \
+     || _CC_LOWER != 3    || _CC_UPPER != 4        || _CC_PUNCT != 5    \
+     || _CC_PRINT != 6    || _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8    \
+     || _CC_CASED != 9    || _CC_SPACE != 10       || _CC_BLANK != 11   \
+     || _CC_XDIGIT != 12  || _CC_CNTRL != 13       || _CC_ASCII != 14   \
+     || _CC_VERTSPACE != 15
+#   error Need to adjust order of anyofs[]
+#  endif
+static const char * const anyofs[] = {
+    "\\w",
+    "\\W",
+    "\\d",
+    "\\D",
+    "[:alpha:]",
+    "[:^alpha:]",
+    "[:lower:]",
+    "[:^lower:]",
+    "[:upper:]",
+    "[:^upper:]",
+    "[:punct:]",
+    "[:^punct:]",
+    "[:print:]",
+    "[:^print:]",
+    "[:alnum:]",
+    "[:^alnum:]",
+    "[:graph:]",
+    "[:^graph:]",
+    "[:cased:]",
+    "[:^cased:]",
+    "\\s",
+    "\\S",
+    "[:blank:]",
+    "[:^blank:]",
+    "[:xdigit:]",
+    "[:^xdigit:]",
+    "[:cntrl:]",
+    "[:^cntrl:]",
+    "[:ascii:]",
+    "[:^ascii:]",
+    "\\v",
+    "\\V"
+};
+#endif
+
  /*
  - regprop - printable representation of opcode, with run time support
  */
  /*
  - regprop - printable representation of opcode, with run time support
  */
@@ -17052,49 +18595,6 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  {
  #ifdef DEBUGGING
      int k;
  {
  #ifdef DEBUGGING
      int k;
-
-    /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
-    static const char * const anyofs[] = {
-#if _CC_WORDCHAR != 0 || _CC_DIGIT != 1 || _CC_ALPHA != 2 || _CC_LOWER != 3 \
-    || _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6                   \
-    || _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8 || _CC_CASED != 9            \
-    || _CC_SPACE != 10 || _CC_BLANK != 11 || _CC_XDIGIT != 12               \
-    || _CC_CNTRL != 13 || _CC_ASCII != 14 || _CC_VERTSPACE != 15
-  #error Need to adjust order of anyofs[]
-#endif
-        "\\w",
-        "\\W",
-        "\\d",
-        "\\D",
-        "[:alpha:]",
-        "[:^alpha:]",
-        "[:lower:]",
-        "[:^lower:]",
-        "[:upper:]",
-        "[:^upper:]",
-        "[:punct:]",
-        "[:^punct:]",
-        "[:print:]",
-        "[:^print:]",
-        "[:alnum:]",
-        "[:^alnum:]",
-        "[:graph:]",
-        "[:^graph:]",
-        "[:cased:]",
-        "[:^cased:]",
-        "\\s",
-        "\\S",
-        "[:blank:]",
-        "[:^blank:]",
-        "[:xdigit:]",
-        "[:^xdigit:]",
-        "[:cntrl:]",
-        "[:^cntrl:]",
-        "[:ascii:]",
-        "[:^ascii:]",
-        "\\v",
-        "\\V"
-    };
      RXi_GET_DECL(prog,progi);
      GET_RE_DEBUG_FLAGS_DECL;
  
      RXi_GET_DECL(prog,progi);
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -17151,10 +18651,13 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
              sv_catpvs(sv, "[");
              (void) put_charclass_bitmap_innards(sv,
          if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
              sv_catpvs(sv, "[");
              (void) put_charclass_bitmap_innards(sv,
-                                                (IS_ANYOF_TRIE(op))
+                                                ((IS_ANYOF_TRIE(op))
                                                   ? ANYOF_BITMAP(o)
                                                   ? ANYOF_BITMAP(o)
-                                                 : TRIE_BITMAP(trie),
-                                                NULL);
+                                                 : TRIE_BITMAP(trie)),
+                                                NULL,
+                                                NULL,
+                                                NULL
+                                               );
              sv_catpvs(sv, "]");
          }
  
              sv_catpvs(sv, "]");
          }
  
@@ -17237,149 +18740,133 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
      else if (k == ANYOF) {
         const U8 flags = ANYOF_FLAGS(o);
         Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
      else if (k == ANYOF) {
         const U8 flags = ANYOF_FLAGS(o);
-       int do_sep = 0;
-        SV* bitmap_invlist;  /* Will hold what the bit map contains */
+        bool do_sep = FALSE;    /* Do we need to separate various components of
+                                   the output? */
+        /* Set if there is still an unresolved user-defined property */
+        SV *unresolved                = NULL;
+
+        /* Things that are ignored except when the runtime locale is UTF-8 */
+        SV *only_utf8_locale_invlist = NULL;
+
+        /* Code points that don't fit in the bitmap */
+        SV *nonbitmap_invlist = NULL;
  
  
+        /* And things that aren't in the bitmap, but are small enough to be */
+        SV* bitmap_range_not_in_bitmap = NULL;
  
         if (OP(o) == ANYOFL) {
  
         if (OP(o) == ANYOFL) {
-            if (flags & ANYOF_LOC_REQ_UTF8) {
-                sv_catpvs(sv, "{utf8-loc}");
+            if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
+                sv_catpvs(sv, "{utf8-locale-reqd}");
              }
              }
-            else {
-                sv_catpvs(sv, "{loc}");
+            if (flags & ANYOFL_FOLD) {
+                sv_catpvs(sv, "{i}");
              }
          }
              }
          }
-       if (flags & ANYOF_LOC_FOLD)
-           sv_catpvs(sv, "{i}");
-       Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
-       if (flags & ANYOF_INVERT)
-           sv_catpvs(sv, "^");
  
  
-        /* output what the standard cp 0-NUM_ANYOF_CODE_POINTS-1 bitmap matches
-         * */
-        do_sep = put_charclass_bitmap_innards(sv, ANYOF_BITMAP(o),
-                                                            &bitmap_invlist);
+        /* If there is stuff outside the bitmap, get it */
+        if (ARG(o) != ANYOF_ONLY_HAS_BITMAP) {
+            (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
+                                                &unresolved,
+                                                &only_utf8_locale_invlist,
+                                                &nonbitmap_invlist);
+            /* The non-bitmap data may contain stuff that could fit in the
+             * bitmap.  This could come from a user-defined property being
+             * finally resolved when this call was done; or much more likely
+             * because there are matches that require UTF-8 to be valid, and so
+             * aren't in the bitmap.  This is teased apart later */
+            _invlist_intersection(nonbitmap_invlist,
+                                  PL_InBitmap,
+                                  &bitmap_range_not_in_bitmap);
+            /* Leave just the things that don't fit into the bitmap */
+            _invlist_subtract(nonbitmap_invlist,
+                              PL_InBitmap,
+                              &nonbitmap_invlist);
+        }
  
  
-        /* output any special charclass tests (used entirely under use
-         * locale) * */
-       if (ANYOF_POSIXL_TEST_ANY_SET(o)) {
-            int i;
-           for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
-               if (ANYOF_POSIXL_TEST(o,i)) {
-                   sv_catpv(sv, anyofs[i]);
-                   do_sep = 1;
-               }
-            }
+        /* Obey this flag to add all above-the-bitmap code points */
+        if (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+            nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
+                                                      NUM_ANYOF_CODE_POINTS,
+                                                      UV_MAX);
          }
  
          }
  
-       if ((flags & (ANYOF_MATCHES_ALL_ABOVE_BITMAP
-                      |ANYOF_HAS_UTF8_NONBITMAP_MATCHES
-                      |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES
-                      |ANYOF_LOC_FOLD)))
-        {
+        /* Ready to start outputting.  First, the initial left bracket */
+       Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+
+        /* Then all the things that could fit in the bitmap */
+        do_sep = put_charclass_bitmap_innards(sv,
+                                              ANYOF_BITMAP(o),
+                                              bitmap_range_not_in_bitmap,
+                                              only_utf8_locale_invlist,
+                                              o);
+        SvREFCNT_dec(bitmap_range_not_in_bitmap);
+
+        /* If there are user-defined properties which haven't been defined yet,
+         * output them, in a separate [] from the bitmap range stuff */
+        if (unresolved) {
              if (do_sep) {
                  Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
              if (do_sep) {
                  Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
-                if (flags & ANYOF_INVERT)
-                    /*make sure the invert info is in each */
-                    sv_catpvs(sv, "^");
              }
              }
-
-            if (OP(o) == ANYOFD
-                && (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
-            {
-                sv_catpvs(sv, "{non-utf8-latin1-all}");
+            if (flags & ANYOF_INVERT) {
+                sv_catpvs(sv, "^");
              }
              }
+            sv_catsv(sv, unresolved);
+            do_sep = TRUE;
+            SvREFCNT_dec_NN(unresolved);
+        }
  
  
-            if (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
-                sv_catpvs(sv, "{above_bitmap_all}");
-
-            if (ARG(o) != ANYOF_ONLY_HAS_BITMAP) {
-                SV *lv; /* Set if there is something outside the bit map. */
-                bool byte_output = FALSE;   /* If something has been output */
-                SV *only_utf8_locale;
-
-                /* Get the stuff that wasn't in the bitmap.  'bitmap_invlist'
-                 * is used to guarantee that nothing in the bitmap gets
-                 * returned */
-                (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
-                                                    &lv, &only_utf8_locale,
-                                                    bitmap_invlist);
-                if (lv && lv != &PL_sv_undef) {
-                    char *s = savesvpv(lv);
-                    char * const origs = s;
-
-                    while (*s && *s != '\n')
-                        s++;
-
-                    if (*s == '\n') {
-                        const char * const t = ++s;
+        /* And, finally, add the above-the-bitmap stuff */
+        if (nonbitmap_invlist && _invlist_len(nonbitmap_invlist)) {
+            SV* contents;
  
  
-                        if (flags & ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES) {
-                            sv_catpvs(sv, "{outside bitmap}");
-                        }
-                        else {
-                            sv_catpvs(sv, "{utf8}");
-                        }
+            /* See if truncation size is overridden */
+            const STRLEN dump_len = (PL_dump_re_max_len)
+                                    ? PL_dump_re_max_len
+                                    : 256;
  
  
-                        if (byte_output) {
-                            sv_catpvs(sv, " ");
-                        }
-
-                        while (*s) {
-                            if (*s == '\n') {
+            /* This is output in a separate [] */
+            if (do_sep) {
+                Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
+            }
  
  
-                                /* Truncate very long output */
-                                if (s - origs > 256) {
-                                    Perl_sv_catpvf(aTHX_ sv,
-                                                "%.*s...",
-                                                (int) (s - origs - 1),
-                                                t);
-                                    goto out_dump;
-                                }
-                                *s = ' ';
-                            }
-                            else if (*s == '\t') {
-                                *s = '-';
-                            }
-                            s++;
-                        }
-                        if (s[-1] == ' ')
-                            s[-1] = 0;
+            /* And, for easy of understanding, it is always output not-shown as
+             * complemented */
+            if (flags & ANYOF_INVERT) {
+                _invlist_invert(nonbitmap_invlist);
+                _invlist_subtract(nonbitmap_invlist, PL_InBitmap, &nonbitmap_invlist);
+            }
  
  
-                        sv_catpv(sv, t);
-                    }
+            contents = invlist_contents(nonbitmap_invlist,
+                                        FALSE /* output suitable for catsv */
+                                       );
  
  
-                  out_dump:
+            /* If the output is shorter than the permissible maximum, just do it. */
+            if (SvCUR(contents) <= dump_len) {
+                sv_catsv(sv, contents);
+            }
+            else {
+                const char * contents_string = SvPVX(contents);
+                STRLEN i = dump_len;
  
  
-                    Safefree(origs);
-                    SvREFCNT_dec_NN(lv);
+                /* Otherwise, start at the permissible max and work back to the
+                 * first break possibility */
+                while (i > 0 && contents_string[i] != ' ') {
+                    i--;
                  }
                  }
-
-                if ((flags & ANYOF_LOC_FOLD)
-                     && only_utf8_locale
-                     && only_utf8_locale != &PL_sv_undef)
-                {
-                    UV start, end;
-                    int max_entries = 256;
-
-                    sv_catpvs(sv, "{utf8 locale}");
-                    invlist_iterinit(only_utf8_locale);
-                    while (invlist_iternext(only_utf8_locale,
-                                            &start, &end)) {
-                        put_range(sv, start, end, FALSE);
-                        max_entries --;
-                        if (max_entries < 0) {
-                            sv_catpvs(sv, "...");
-                            break;
-                        }
-                    }
-                    invlist_iterfinish(only_utf8_locale);
+                if (i == 0) {       /* Fail-safe.  Use the max if we couldn't
+                                       find a legal break */
+                    i = dump_len;
                  }
                  }
+
+                sv_catpvn(sv, contents_string, i);
+                sv_catpvs(sv, "...");
              }
              }
-       }
-        SvREFCNT_dec(bitmap_invlist);
  
  
+            SvREFCNT_dec_NN(contents);
+            SvREFCNT_dec_NN(nonbitmap_invlist);
+        }
  
  
+        /* And finally the matching, closing ']' */
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
      }
      else if (k == POSIXD || k == NPOSIXD) {
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
      }
      else if (k == POSIXD || k == NPOSIXD) {
@@ -17402,9 +18889,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          const char * const bounds[] = {
              "",      /* Traditional */
              "{gcb}",
          const char * const bounds[] = {
              "",      /* Traditional */
              "{gcb}",
+            "{lb}",
              "{sb}",
              "{wb}"
          };
              "{sb}",
              "{wb}"
          };
+        assert(FLAGS(o) < C_ARRAY_LENGTH(bounds));
          sv_catpv(sv, bounds[FLAGS(o)]);
      }
      else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
          sv_catpv(sv, bounds[FLAGS(o)]);
      }
      else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
@@ -18044,18 +19533,18 @@ S_put_code_point(pTHX_ SV *sv, UV c)
      }
      else if (isPRINT(c)) {
         const char string = (char) c;
      }
      else if (isPRINT(c)) {
         const char string = (char) c;
-       if (isBACKSLASHED_PUNCT(c))
+
+        /* We use {phrase} as metanotation in the class, so also escape literal
+         * braces */
+       if (isBACKSLASHED_PUNCT(c) || c == '{' || c == '}')
             sv_catpvs(sv, "\\");
         sv_catpvn(sv, &string, 1);
      }
             sv_catpvs(sv, "\\");
         sv_catpvn(sv, &string, 1);
      }
+    else if (isMNEMONIC_CNTRL(c)) {
+        Perl_sv_catpvf(aTHX_ sv, "%s", cntrl_to_mnemonic((U8) c));
+    }
      else {
      else {
-        const char * const mnemonic = cntrl_to_mnemonic((char) c);
-        if (mnemonic) {
-            Perl_sv_catpvf(aTHX_ sv, "%s", mnemonic);
-        }
-        else {
-            Perl_sv_catpvf(aTHX_ sv, "\\x{%02X}", (U8) c);
-        }
+        Perl_sv_catpvf(aTHX_ sv, "\\x%02X", (U8) c);
      }
  }
  
      }
  }
  
@@ -18065,8 +19554,15 @@ STATIC void
  S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  {
      /* Appends to 'sv' a displayable version of the range of code points from
  S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  {
      /* Appends to 'sv' a displayable version of the range of code points from
-     * 'start' to 'end'.  It assumes that only ASCII printables are displayable
-     * as-is (though some of these will be escaped by put_code_point()). */
+     * 'start' to 'end'.  Mnemonics (like '\r') are used for the few controls
+     * that have them, when they occur at the beginning or end of the range.
+     * It uses hex to output the remaining code points, unless 'allow_literals'
+     * is true, in which case the printable ASCII ones are output as-is (though
+     * some of these will be escaped by put_code_point()).
+     *
+     * NOTE:  This is designed only for printing ranges of code points that fit
+     *        inside an ANYOF bitmap.  Higher code points are simply suppressed
+     */
  
      const unsigned int min_range_count = 3;
  
  
      const unsigned int min_range_count = 3;
  
@@ -18080,7 +19576,7 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  
          if (end - start < min_range_count) {
  
  
          if (end - start < min_range_count) {
  
-            /* Individual chars in short ranges */
+            /* Output chars individually when they occur in short ranges */
              for (; start <= end; start++) {
                  put_code_point(sv, start);
              }
              for (; start <= end; start++) {
                  put_code_point(sv, start);
              }
@@ -18089,11 +19585,11 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  
          /* If permitted by the input options, and there is a possibility that
           * this range contains a printable literal, look to see if there is
  
          /* If permitted by the input options, and there is a possibility that
           * this range contains a printable literal, look to see if there is
-         * one.  */
+         * one. */
          if (allow_literals && start <= MAX_PRINT_A) {
  
          if (allow_literals && start <= MAX_PRINT_A) {
  
-            /* If the range begin isn't an ASCII printable, effectively split
-             * the range into two parts:
+            /* If the character at the beginning of the range isn't an ASCII
+             * printable, effectively split the range into two parts:
               *  1) the portion before the first such printable,
               *  2) the rest
               * and output them separately. */
               *  1) the portion before the first such printable,
               *  2) the rest
               * and output them separately. */
@@ -18115,18 +19611,18 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
                      temp_end = end + 1;
                  }
  
                      temp_end = end + 1;
                  }
  
-                /* Output the first part of the split range, the part that
-                 * doesn't have printables, with no looking for literals
-                 * (otherwise we would infinitely recurse) */
+                /* Output the first part of the split range: the part that
+                 * doesn't have printables, with the parameter set to not look
+                 * for literals (otherwise we would infinitely recurse) */
                  put_range(sv, start, temp_end - 1, FALSE);
  
                  /* The 2nd part of the range (if any) starts here. */
                  start = temp_end;
  
                  put_range(sv, start, temp_end - 1, FALSE);
  
                  /* The 2nd part of the range (if any) starts here. */
                  start = temp_end;
  
-                /* We continue instead of dropping down because even if the 2nd
-                 * part is non-empty, it could be so short that we want to
-                 * output it specially, as tested for at the top of this loop.
-                 * */
+                /* We do a continue, instead of dropping down, because even if
+                 * the 2nd part is non-empty, it could be so short that we want
+                 * to output it as individual characters, as tested for at the
+                 * top of this loop.  */
                  continue;
              }
  
                  continue;
              }
  
@@ -18191,7 +19687,8 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
                  temp_end--;
              }
  
                  temp_end--;
              }
  
-            /* And separately output the range that doesn't have mnemonics */
+            /* And separately output the interior range that doesn't start or
+             * end with mnemonics */
              put_range(sv, start, temp_end, FALSE);
  
              /* Then output the mnemonic trailing controls */
              put_range(sv, start, temp_end, FALSE);
  
              /* Then output the mnemonic trailing controls */
@@ -18210,10 +19707,10 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
                      : NUM_ANYOF_CODE_POINTS - 1;
  #if NUM_ANYOF_CODE_POINTS > 256
          format = (this_end < 256)
                      : NUM_ANYOF_CODE_POINTS - 1;
  #if NUM_ANYOF_CODE_POINTS > 256
          format = (this_end < 256)
-                 ? "\\x{%02"UVXf"}-\\x{%02"UVXf"}"
+                 ? "\\x%02"UVXf"-\\x%02"UVXf""
                   : "\\x{%04"UVXf"}-\\x{%04"UVXf"}";
  #else
                   : "\\x{%04"UVXf"}-\\x{%04"UVXf"}";
  #else
-        format = "\\x{%02"UVXf"}-\\x{%02"UVXf"}";
+        format = "\\x%02"UVXf"-\\x%02"UVXf"";
  #endif
          GCC_DIAG_IGNORE(-Wformat-nonliteral);
          Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
  #endif
          GCC_DIAG_IGNORE(-Wformat-nonliteral);
          Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
@@ -18222,57 +19719,26 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
      }
  }
  
      }
  }
  
-STATIC bool
-S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
+STATIC void
+S_put_charclass_bitmap_innards_invlist(pTHX_ SV *sv, SV* invlist)
  {
  {
-    /* Appends to 'sv' a displayable version of the innards of the bracketed
-     * character class whose bitmap is 'bitmap';  Returns 'TRUE' if it actually
-     * output anything, and bitmap_invlist, if not NULL, will point to an
-     * inversion list of what is in the bit map */
+    /* Concatenate onto the PV in 'sv' a displayable form of the inversion list
+     * 'invlist' */
  
  
-    int i;
      UV start, end;
      UV start, end;
-    unsigned int punct_count = 0;
-    SV* invlist = NULL;
-    SV** invlist_ptr;   /* Temporary, in case bitmap_invlist is NULL */
      bool allow_literals = TRUE;
  
      bool allow_literals = TRUE;
  
-    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;
-
-    invlist_ptr = (bitmap_invlist) ? bitmap_invlist : &invlist;
-
-    /* Worst case is exactly every-other code point is in the list */
-    *invlist_ptr = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
-
-    /* Convert the bit map to an inversion list, keeping track of how many
-     * ASCII puncts are set, including an extra amount for the backslashed
-     * ones.  */
-    for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
-        if (BITMAP_TEST(bitmap, i)) {
-            *invlist_ptr = add_cp_to_invlist(*invlist_ptr, i);
-            if (isPUNCT_A(i)) {
-                punct_count++;
-                if isBACKSLASHED_PUNCT(i) {
-                    punct_count++;
-                }
-            }
-        }
-    }
-
-    /* Nothing to output */
-    if (_invlist_len(*invlist_ptr) == 0) {
-        SvREFCNT_dec(invlist);
-        return FALSE;
-    }
+    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_INVLIST;
  
      /* Generally, it is more readable if printable characters are output as
       * literals, but if a range (nearly) spans all of them, it's best to output
       * it as a single range.  This code will use a single range if all but 2
  
      /* Generally, it is more readable if printable characters are output as
       * literals, but if a range (nearly) spans all of them, it's best to output
       * it as a single range.  This code will use a single range if all but 2
-     * printables are in it */
-    invlist_iterinit(*invlist_ptr);
-    while (invlist_iternext(*invlist_ptr, &start, &end)) {
+     * ASCII printables are in it */
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
  
  
-        /* If range starts beyond final printable, it doesn't have any in it */
+        /* If the range starts beyond the final printable, it doesn't have any
+         * in it */
          if (start > MAX_PRINT_A) {
              break;
          }
          if (start > MAX_PRINT_A) {
              break;
          }
@@ -18293,36 +19759,347 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
              break;
          }
      }
              break;
          }
      }
-    invlist_iterfinish(*invlist_ptr);
-
-    /* The legibility of the output depends mostly on how many punctuation
-     * characters are output.  There are 32 possible ASCII ones, and some have
-     * an additional backslash, bringing it to currently 36, so if any more
-     * than 18 are to be output, we can instead output it as its complement,
-     * yielding fewer puncts, and making it more legible.  But give some weight
-     * to the fact that outputting it as a complement is less legible than a
-     * straight output, so don't complement unless we are somewhat over the 18
-     * mark */
-    if (allow_literals && punct_count > 22) {
-        sv_catpvs(sv, "^");
-
-        /* Add everything remaining to the list, so when we invert it just
-         * below, it will be excluded */
-        _invlist_union_complement_2nd(*invlist_ptr, PL_InBitmap, invlist_ptr);
-        _invlist_invert(*invlist_ptr);
-    }
+    invlist_iterfinish(invlist);
  
      /* Here we have figured things out.  Output each range */
  
      /* Here we have figured things out.  Output each range */
-    invlist_iterinit(*invlist_ptr);
-    while (invlist_iternext(*invlist_ptr, &start, &end)) {
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
          if (start >= NUM_ANYOF_CODE_POINTS) {
              break;
          }
          put_range(sv, start, end, allow_literals);
      }
          if (start >= NUM_ANYOF_CODE_POINTS) {
              break;
          }
          put_range(sv, start, end, allow_literals);
      }
-    invlist_iterfinish(*invlist_ptr);
+    invlist_iterfinish(invlist);
  
  
-    return TRUE;
+    return;
+}
+
+STATIC SV*
+S_put_charclass_bitmap_innards_common(pTHX_
+        SV* invlist,            /* The bitmap */
+        SV* posixes,            /* Under /l, things like [:word:], \S */
+        SV* only_utf8,          /* Under /d, matches iff the target is UTF-8 */
+        SV* not_utf8,           /* /d, matches iff the target isn't UTF-8 */
+        SV* only_utf8_locale,   /* Under /l, matches if the locale is UTF-8 */
+        const bool invert       /* Is the result to be inverted? */
+)
+{
+    /* Create and return an SV containing a displayable version of the bitmap
+     * and associated information determined by the input parameters. */
+
+    SV * output;
+
+    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON;
+
+    if (invert) {
+        output = newSVpvs("^");
+    }
+    else {
+        output = newSVpvs("");
+    }
+
+    /* First, the code points in the bitmap that are unconditionally there */
+    put_charclass_bitmap_innards_invlist(output, invlist);
+
+    /* Traditionally, these have been placed after the main code points */
+    if (posixes) {
+        sv_catsv(output, posixes);
+    }
+
+    if (only_utf8 && _invlist_len(only_utf8)) {
+        Perl_sv_catpvf(aTHX_ output, "%s{utf8}%s", PL_colors[1], PL_colors[0]);
+        put_charclass_bitmap_innards_invlist(output, only_utf8);
+    }
+
+    if (not_utf8 && _invlist_len(not_utf8)) {
+        Perl_sv_catpvf(aTHX_ output, "%s{not utf8}%s", PL_colors[1], PL_colors[0]);
+        put_charclass_bitmap_innards_invlist(output, not_utf8);
+    }
+
+    if (only_utf8_locale && _invlist_len(only_utf8_locale)) {
+        Perl_sv_catpvf(aTHX_ output, "%s{utf8 locale}%s", PL_colors[1], PL_colors[0]);
+        put_charclass_bitmap_innards_invlist(output, only_utf8_locale);
+
+        /* This is the only list in this routine that can legally contain code
+         * points outside the bitmap range.  The call just above to
+         * 'put_charclass_bitmap_innards_invlist' will simply suppress them, so
+         * output them here.  There's about a half-dozen possible, and none in
+         * contiguous ranges longer than 2 */
+        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
+            UV start, end;
+            SV* above_bitmap = NULL;
+
+            _invlist_subtract(only_utf8_locale, PL_InBitmap, &above_bitmap);
+
+            invlist_iterinit(above_bitmap);
+            while (invlist_iternext(above_bitmap, &start, &end)) {
+                UV i;
+
+                for (i = start; i <= end; i++) {
+                    put_code_point(output, i);
+                }
+            }
+            invlist_iterfinish(above_bitmap);
+            SvREFCNT_dec_NN(above_bitmap);
+        }
+    }
+
+    /* If the only thing we output is the '^', clear it */
+    if (invert && SvCUR(output) == 1) {
+        SvCUR_set(output, 0);
+    }
+
+    return output;
+}
+
+STATIC bool
+S_put_charclass_bitmap_innards(pTHX_ SV *sv,
+                                     char *bitmap,
+                                     SV *nonbitmap_invlist,
+                                     SV *only_utf8_locale_invlist,
+                                     const regnode * const node)
+{
+    /* Appends to 'sv' a displayable version of the innards of the bracketed
+     * character class defined by the other arguments:
+     *  'bitmap' points to the bitmap.
+     *  'nonbitmap_invlist' is an inversion list of the code points that are in
+     *      the bitmap range, but for some reason aren't in the bitmap; NULL if
+     *      none.  The reasons for this could be that they require some
+     *      condition such as the target string being or not being in UTF-8
+     *      (under /d), or because they came from a user-defined property that
+     *      was not resolved at the time of the regex compilation (under /u)
+     *  'only_utf8_locale_invlist' is an inversion list of the code points that
+     *      are valid only if the runtime locale is a UTF-8 one; NULL if none
+     *  'node' is the regex pattern node.  It is needed only when the above two
+     *      parameters are not null, and is passed so that this routine can
+     *      tease apart the various reasons for them.
+     *
+     * It returns TRUE if there was actually something output.  (It may be that
+     * the bitmap, etc is empty.)
+     *
+     * When called for outputting the bitmap of a non-ANYOF node, just pass the
+     * bitmap, with the succeeding parameters set to NULL.
+     *
+     */
+
+    /* In general, it tries to display the 'cleanest' representation of the
+     * innards, choosing whether to display them inverted or not, regardless of
+     * whether the class itself is to be inverted.  However,  there are some
+     * cases where it can't try inverting, as what actually matches isn't known
+     * until runtime, and hence the inversion isn't either. */
+    bool inverting_allowed = TRUE;
+
+    int i;
+    STRLEN orig_sv_cur = SvCUR(sv);
+
+    SV* invlist;            /* Inversion list we accumulate of code points that
+                               are unconditionally matched */
+    SV* only_utf8 = NULL;   /* Under /d, list of matches iff the target is
+                               UTF-8 */
+    SV* not_utf8 =  NULL;   /* /d, list of matches iff the target isn't UTF-8
+                             */
+    SV* posixes = NULL;     /* Under /l, string of things like [:word:], \D */
+    SV* only_utf8_locale = NULL;    /* Under /l, list of matches if the locale
+                                       is UTF-8 */
+
+    SV* as_is_display;      /* The output string when we take the inputs
+                              literally */
+    SV* inverted_display;   /* The output string when we invert the inputs */
+
+    U8 flags = (node) ? ANYOF_FLAGS(node) : 0;
+
+    bool invert = cBOOL(flags & ANYOF_INVERT);  /* Is the input to be inverted
+                                                   to match? */
+    /* We are biased in favor of displaying things without them being inverted,
+     * as that is generally easier to understand */
+    const int bias = 5;
+
+    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;
+
+    /* Start off with whatever code points are passed in.  (We clone, so we
+     * don't change the caller's list) */
+    if (nonbitmap_invlist) {
+        assert(invlist_highest(nonbitmap_invlist) < NUM_ANYOF_CODE_POINTS);
+        invlist = invlist_clone(nonbitmap_invlist);
+    }
+    else {  /* Worst case size is every other code point is matched */
+        invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
+    }
+
+    if (flags) {
+        if (OP(node) == ANYOFD) {
+
+            /* This flag indicates that the code points below 0x100 in the
+             * nonbitmap list are precisely the ones that match only when the
+             * target is UTF-8 (they should all be non-ASCII). */
+            if (flags & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP)
+            {
+                _invlist_intersection(invlist, PL_UpperLatin1, &only_utf8);
+                _invlist_subtract(invlist, only_utf8, &invlist);
+            }
+
+            /* And this flag for matching all non-ASCII 0xFF and below */
+            if (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
+            {
+                if (invert) {
+                    not_utf8 = _new_invlist(0);
+                }
+                else {
+                    not_utf8 = invlist_clone(PL_UpperLatin1);
+                }
+                inverting_allowed = FALSE;  /* XXX needs more work to be able
+                                               to allow this */
+            }
+        }
+        else if (OP(node) == ANYOFL) {
+
+            /* If either of these flags are set, what matches isn't
+             * determinable except during execution, so don't know enough here
+             * to invert */
+            if (flags & (ANYOFL_FOLD|ANYOF_MATCHES_POSIXL)) {
+                inverting_allowed = FALSE;
+            }
+
+            /* What the posix classes match also varies at runtime, so these
+             * will be output symbolically. */
+            if (ANYOF_POSIXL_TEST_ANY_SET(node)) {
+                int i;
+
+                posixes = newSVpvs("");
+                for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
+                    if (ANYOF_POSIXL_TEST(node,i)) {
+                        sv_catpv(posixes, anyofs[i]);
+                    }
+                }
+            }
+        }
+    }
+
+    /* Accumulate the bit map into the unconditional match list */
+    for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
+        if (BITMAP_TEST(bitmap, i)) {
+            int start = i++;
+            for (; i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i); i++) {
+                /* empty */
+            }
+            invlist = _add_range_to_invlist(invlist, start, i-1);
+        }
+    }
+
+    /* Make sure that the conditional match lists don't have anything in them
+     * that match unconditionally; otherwise the output is quite confusing.
+     * This could happen if the code that populates these misses some
+     * duplication. */
+    if (only_utf8) {
+        _invlist_subtract(only_utf8, invlist, &only_utf8);
+    }
+    if (not_utf8) {
+        _invlist_subtract(not_utf8, invlist, &not_utf8);
+    }
+
+    if (only_utf8_locale_invlist) {
+
+        /* Since this list is passed in, we have to make a copy before
+         * modifying it */
+        only_utf8_locale = invlist_clone(only_utf8_locale_invlist);
+
+        _invlist_subtract(only_utf8_locale, invlist, &only_utf8_locale);
+
+        /* And, it can get really weird for us to try outputting an inverted
+         * form of this list when it has things above the bitmap, so don't even
+         * try */
+        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
+            inverting_allowed = FALSE;
+        }
+    }
+
+    /* Calculate what the output would be if we take the input as-is */
+    as_is_display = put_charclass_bitmap_innards_common(invlist,
+                                                    posixes,
+                                                    only_utf8,
+                                                    not_utf8,
+                                                    only_utf8_locale,
+                                                    invert);
+
+    /* If have to take the output as-is, just do that */
+    if (! inverting_allowed) {
+        sv_catsv(sv, as_is_display);
+    }
+    else { /* But otherwise, create the output again on the inverted input, and
+              use whichever version is shorter */
+
+        int inverted_bias, as_is_bias;
+
+        /* We will apply our bias to whichever of the the results doesn't have
+         * the '^' */
+        if (invert) {
+            invert = FALSE;
+            as_is_bias = bias;
+            inverted_bias = 0;
+        }
+        else {
+            invert = TRUE;
+            as_is_bias = 0;
+            inverted_bias = bias;
+        }
+
+        /* Now invert each of the lists that contribute to the output,
+         * excluding from the result things outside the possible range */
+
+        /* For the unconditional inversion list, we have to add in all the
+         * conditional code points, so that when inverted, they will be gone
+         * from it */
+        _invlist_union(only_utf8, invlist, &invlist);
+        _invlist_union(only_utf8_locale, invlist, &invlist);
+        _invlist_invert(invlist);
+        _invlist_intersection(invlist, PL_InBitmap, &invlist);
+
+        if (only_utf8) {
+            _invlist_invert(only_utf8);
+            _invlist_intersection(only_utf8, PL_UpperLatin1, &only_utf8);
+        }
+
+        if (not_utf8) {
+            _invlist_invert(not_utf8);
+            _invlist_intersection(not_utf8, PL_UpperLatin1, &not_utf8);
+        }
+
+        if (only_utf8_locale) {
+            _invlist_invert(only_utf8_locale);
+            _invlist_intersection(only_utf8_locale,
+                                  PL_InBitmap,
+                                  &only_utf8_locale);
+        }
+
+        inverted_display = put_charclass_bitmap_innards_common(
+                                            invlist,
+                                            posixes,
+                                            only_utf8,
+                                            not_utf8,
+                                            only_utf8_locale, invert);
+
+        /* Use the shortest representation, taking into account our bias
+         * against showing it inverted */
+        if (SvCUR(inverted_display) + inverted_bias
+            < SvCUR(as_is_display) + as_is_bias)
+        {
+           sv_catsv(sv, inverted_display);
+        }
+        else {
+           sv_catsv(sv, as_is_display);
+        }
+
+        SvREFCNT_dec_NN(as_is_display);
+        SvREFCNT_dec_NN(inverted_display);
+    }
+
+    SvREFCNT_dec_NN(invlist);
+    SvREFCNT_dec(only_utf8);
+    SvREFCNT_dec(not_utf8);
+    SvREFCNT_dec(posixes);
+    SvREFCNT_dec(only_utf8_locale);
+
+    return SvCUR(sv) > orig_sv_cur;
  }
  
  #define CLEAR_OPTSTART \
  }
  
  #define CLEAR_OPTSTART \