PATCH: [perl #126141]: qr/]]]]][\\/ fails to raise error

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index aa06bc6..68c599f 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -105,6 +105,10 @@ EXTERN_C const struct regexp_engine my_reg_engine;
  #define MIN(a,b) ((a) < (b) ? (a) : (b))
  #endif
  
+#ifndef MAX
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#endif
+
  /* this is a chain of data about sub patterns we are processing that
     need to be handled separately/specially in study_chunk. Its so
     we can simulate recursion without losing state.  */
@@ -195,7 +199,6 @@ struct RExC_state_t {
      scan_frame *frame_head;
      scan_frame *frame_last;
      U32         frame_count;
-    U32         strict;
  #ifdef ADD_TO_REGEXEC
      char       *starttry;              /* -Dr: where regtry was called. */
  #define RExC_starttry  (pRExC_state->starttry)
@@ -218,6 +221,7 @@ struct RExC_state_t {
  
  #endif
      bool        seen_unfolded_sharp_s;
+    bool        strict;
  };
  
  #define RExC_flags     (pRExC_state->flags)
@@ -1356,7 +1360,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
       * returned list must, and will, contain every code point that is a
       * possibility. */
  
-    SV* invlist = sv_2mortal(_new_invlist(0));
+    SV* invlist = NULL;
      SV* only_utf8_locale_invlist = NULL;
      unsigned int i;
      const U32 n = ARG(node);
@@ -1378,6 +1382,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
  
              /* Here, no compile-time swash, and there are things that won't be
               * known until runtime -- we have to assume it could be anything */
+            invlist = sv_2mortal(_new_invlist(1));
              return _add_range_to_invlist(invlist, 0, UV_MAX);
          }
          else if (ary[3] && ary[3] != &PL_sv_undef) {
@@ -1395,6 +1400,10 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
          }
      }
  
+    if (! invlist) {
+        invlist = sv_2mortal(_new_invlist(0));
+    }
+
      /* An ANYOF node contains a bitmap for the first NUM_ANYOF_CODE_POINTS
       * code points, and an inversion list for the others, but if there are code
       * points that should match only conditionally on the target string being
@@ -1414,14 +1423,21 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
      /* Add in the points from the bit map */
      for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
          if (ANYOF_BITMAP_TEST(node, i)) {
-            invlist = add_cp_to_invlist(invlist, i);
+            unsigned int start = i++;
+
+            for (; i < NUM_ANYOF_CODE_POINTS && ANYOF_BITMAP_TEST(node, i); ++i) {
+                /* empty */
+            }
+            invlist = _add_range_to_invlist(invlist, start, i-1);
              new_node_has_latin1 = TRUE;
          }
      }
  
      /* If this can match all upper Latin1 code points, have to add them
-     * as well */
-    if (OP(node) == ANYOFD
+     * as well.  But don't add them if inverting, as when that gets done below,
+     * it would exclude all these characters, including the ones it shouldn't
+     * that were added just above */
+    if (! (ANYOF_FLAGS(node) & ANYOF_INVERT) && OP(node) == ANYOFD
          && (ANYOF_FLAGS(node) & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
      {
          _invlist_union(invlist, PL_UpperLatin1, &invlist);
@@ -1822,35 +1838,28 @@ S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
       *      (unassigned, private use, surrogates, controls and formats).  This
       *      is a much large number. */
  
-    const U32 max_match = (LOC)
-                          ? 127
-                          : (! UNI_SEMANTICS)
-                            ? 63
-                            : (invlist_highest(ssc->invlist) < 256)
-                              ? 127
-                              : ((NON_OTHER_COUNT + 1) / 2) - 1;
      U32 count = 0;      /* Running total of number of code points matched by
                             'ssc' */
      UV start, end;      /* Start and end points of current range in inversion
                             list */
+    const U32 max_code_points = (LOC)
+                                ?  256
+                                : ((   ! UNI_SEMANTICS
+                                     || invlist_highest(ssc->invlist) < 256)
+                                  ? 128
+                                  : NON_OTHER_COUNT);
+    const U32 max_match = max_code_points / 2;
  
      PERL_ARGS_ASSERT_IS_SSC_WORTH_IT;
  
      invlist_iterinit(ssc->invlist);
      while (invlist_iternext(ssc->invlist, &start, &end)) {
-
-        /* /u is the only thing that we expect to match above 255; so if not /u
-         * and even if there are matches above 255, ignore them.  This catches
-         * things like \d under /d which does match the digits above 255, but
-         * since the pattern is /d, it is not likely to be expecting them */
-        if (! UNI_SEMANTICS) {
-            if (start > 255) {
-                break;
-            }
-            end = MIN(end, 255);
+        if (start >= max_code_points) {
+            break;
          }
+        end = MIN(end, max_code_points - 1);
          count += end - start + 1;
-        if (count > max_match) {
+        if (count >= max_match) {
              invlist_iterfinish(ssc->invlist);
              return FALSE;
          }
@@ -6705,6 +6714,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      /* Initialize these here instead of as-needed, as is quick and avoids
       * having to test them each time otherwise */
      if (! PL_AboveLatin1) {
+#ifdef DEBUGGING
+        char * dump_len_string;
+#endif
+
         PL_AboveLatin1 = _new_invlist_C_array(AboveLatin1_invlist);
         PL_Latin1 = _new_invlist_C_array(Latin1_invlist);
         PL_UpperLatin1 = _new_invlist_C_array(UpperLatin1_invlist);
@@ -6718,6 +6731,14 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         PL_InBitmap = _new_invlist(2);
         PL_InBitmap = _add_range_to_invlist(PL_InBitmap, 0,
                                                      NUM_ANYOF_CODE_POINTS - 1);
+#ifdef DEBUGGING
+        dump_len_string = PerlEnv_getenv("PERL_DUMP_RE_MAX_LEN");
+        if (   ! dump_len_string
+            || ! grok_atoUV(dump_len_string, (UV *)&PL_dump_re_max_len, NULL))
+        {
+            PL_dump_re_max_len = 0;
+        }
+#endif
      }
  
      pRExC_state->code_blocks = NULL;
@@ -8164,7 +8185,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
      assert (RExC_parse <= RExC_end);
      if (RExC_parse == RExC_end) NOOP;
      else if (isIDFIRST_lazy_if(RExC_parse, UTF)) {
-        /* skip IDFIRST by using do...while */
+         /* Note that the code here assumes well-formed UTF-8.  Skip IDFIRST by
+          * using do...while */
         if (UTF)
             do {
                 RExC_parse += UTF8SKIP(RExC_parse);
@@ -8339,6 +8361,52 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  
  #ifndef PERL_IN_XSUB_RE
  
+STATIC void
+S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
+{
+    /* Replaces the inversion list in 'src' with the one in 'dest'.  It steals
+     * the list from 'src', so 'src' is made to have a NULL list.  This is
+     * similar to what SvSetMagicSV() would do, if it were implemented on
+     * inversion lists, though this routine avoids a copy */
+
+    const UV src_len          = _invlist_len(src);
+    const bool src_offset     = *get_invlist_offset_addr(src);
+    const STRLEN src_byte_len = SvLEN(src);
+    char * array              = SvPVX(src);
+
+    const int oldtainted = TAINT_get;
+
+    PERL_ARGS_ASSERT_INVLIST_REPLACE_LIST_DESTROYS_SRC;
+
+    assert(SvTYPE(src) == SVt_INVLIST);
+    assert(SvTYPE(dest) == SVt_INVLIST);
+    assert(! invlist_is_iterating(src));
+    assert(SvCUR(src) == 0 || SvCUR(src) < SvLEN(src));
+
+    /* Make sure it ends in the right place with a NUL, as our inversion list
+     * manipulations aren't careful to keep this true, but sv_usepvn_flags()
+     * asserts it */
+    array[src_byte_len - 1] = '\0';
+
+    TAINT_NOT;      /* Otherwise it breaks */
+    sv_usepvn_flags(dest,
+                    (char *) array,
+                    src_byte_len - 1,
+
+                    /* This flag is documented to cause a copy to be avoided */
+                    SV_HAS_TRAILING_NUL);
+    TAINT_set(oldtainted);
+    SvPV_set(src, 0);
+    SvLEN_set(src, 0);
+    SvCUR_set(src, 0);
+
+    /* Finish up copying over the other fields in an inversion list */
+    *get_invlist_offset_addr(dest) = src_offset;
+    invlist_set_len(dest, src_len, src_offset);
+    *get_invlist_previous_index_addr(dest) = 0;
+    invlist_iterfinish(dest);
+}
+
  PERL_STATIC_INLINE IV*
  S_get_invlist_previous_index_addr(SV* invlist)
  {
@@ -8374,17 +8442,34 @@ S_invlist_set_previous_index(SV* const invlist, const IV index)
  }
  
  PERL_STATIC_INLINE void
-S_invlist_trim(SV* const invlist)
+S_invlist_trim(SV* invlist)
  {
+    /* Free the not currently-being-used space in an inversion list */
+
+    /* But don't free up the space needed for the 0 UV that is always at the
+     * beginning of the list, nor the trailing NUL */
+    const UV min_size = TO_INTERNAL_SIZE(1) + 1;
+
      PERL_ARGS_ASSERT_INVLIST_TRIM;
  
      assert(SvTYPE(invlist) == SVt_INVLIST);
  
-    /* Change the length of the inversion list to how many entries it currently
-     * has */
-    SvPV_shrink_to_cur((SV *) invlist);
+    SvPV_renew(invlist, MAX(min_size, SvCUR(invlist) + 1));
  }
  
+PERL_STATIC_INLINE void
+S_invlist_clear(pTHX_ SV* invlist)    /* Empty the inversion list */
+{
+    PERL_ARGS_ASSERT_INVLIST_CLEAR;
+
+    assert(SvTYPE(invlist) == SVt_INVLIST);
+
+    invlist_set_len(invlist, 0, 0);
+    invlist_trim(invlist);
+}
+
+#endif /* ifndef PERL_IN_XSUB_RE */
+
  PERL_STATIC_INLINE bool
  S_invlist_is_iterating(SV* const invlist)
  {
@@ -8393,8 +8478,6 @@ S_invlist_is_iterating(SV* const invlist)
      return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
  }
  
-#endif /* ifndef PERL_IN_XSUB_RE */
-
  PERL_STATIC_INLINE UV
  S_invlist_max(SV* const invlist)
  {
@@ -8607,7 +8690,9 @@ Perl__invlist_search(SV* const invlist, const UV cp)
      /* Searches the inversion list for the entry that contains the input code
       * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
       * return value is the index into the list's array of the range that
-     * contains <cp> */
+     * contains <cp>, that is, 'i' such that
+     * array[i] <= cp < array[i+1]
+     */
  
      IV low = 0;
      IV mid;
@@ -8626,7 +8711,10 @@ Perl__invlist_search(SV* const invlist, const UV cp)
      array = invlist_array(invlist);
  
      mid = invlist_previous_index(invlist);
-    assert(mid >=0 && mid <= highest_element);
+    assert(mid >=0);
+    if (mid > highest_element) {
+        mid = highest_element;
+    }
  
      /* <mid> contains the cache of the result of the previous call to this
       * function (0 the first time).  See if this call is for the same result,
@@ -8790,10 +8878,10 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* Take the union of two inversion lists and point <output> to it.  *output
       * SHOULD BE DEFINED upon input, and if it points to one of the two lists,
       * the reference count to that list will be decremented if not already a
-     * temporary (mortal); otherwise *output will be made correspondingly
-     * mortal.  The first list, <a>, may be NULL, in which case a copy of the
-     * second list is returned.  If <complement_b> is TRUE, the union is taken
-     * of the complement (inversion) of <b> instead of b itself.
+     * temporary (mortal); otherwise just its contents will be modified to be
+     * the union.  The first list, <a>, may be NULL, in which case a copy of
+     * the second list is returned.  If <complement_b> is TRUE, the union is
+     * taken of the complement (inversion) of <b> instead of b itself.
       *
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
@@ -8832,56 +8920,103 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
      assert(a != b);
  
-    /* If either one is empty, the union is the other one */
-    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
-        bool make_temp = FALSE; /* Should we mortalize the result? */
+    len_b = _invlist_len(b);
+    if (len_b == 0) {
  
-       if (*output == a) {
-            if (a != NULL) {
-                if (! (make_temp = cBOOL(SvTEMP(a)))) {
-                    SvREFCNT_dec_NN(a);
-                }
+        /* Here, 'b' is empty.  If the output is the complement of 'b', the
+         * union is all possible code points, and we need not even look at 'a'.
+         * It's easiest to create a new inversion list that matches everything.
+         * */
+        if (complement_b) {
+            SV* everything = _new_invlist(1);
+            _append_range_to_invlist(everything, 0, UV_MAX);
+
+            /* If the output didn't exist, just point it at the new list */
+            if (*output == NULL) {
+                *output = everything;
+                return;
              }
-       }
-       if (*output != b) {
-           *output = invlist_clone(b);
-            if (complement_b) {
-                _invlist_invert(*output);
+
+            /* Otherwise, replace its contents with the new list */
+            invlist_replace_list_destroys_src(*output, everything);
+            SvREFCNT_dec_NN(everything);
+            return;
+        }
+
+        /* Here, we don't want the complement of 'b', and since it is empty,
+         * the union will come entirely from 'a'.  If 'a' is NULL or empty, the
+         * output will be empty */
+
+        if (a == NULL) {
+            *output = _new_invlist(0);
+            return;
+        }
+
+        if (_invlist_len(a) == 0) {
+            invlist_clear(*output);
+            return;
+        }
+
+        /* Here, 'a' is not empty, and entirely determines the union.  If the
+         * output is not to overwrite 'b', we can just return 'a'. */
+        if (*output != b) {
+
+            /* If the output is to overwrite 'a', we have a no-op, as it's
+             * already in 'a' */
+            if (*output == a) {
+                return;
              }
-       } /* else *output already = b; */
  
-        if (make_temp) {
-            sv_2mortal(*output);
+            /* But otherwise we have to copy 'a' to the output */
+            *output = invlist_clone(a);
+            return;
          }
+
+        /* Here, 'b' is to be overwritten by the output, which will be 'a' */
+        u = invlist_clone(a);
+        invlist_replace_list_destroys_src(*output, u);
+        SvREFCNT_dec_NN(u);
+
         return;
      }
-    else if ((len_b = _invlist_len(b)) == 0) {
-        bool make_temp = FALSE;
-       if (*output == b) {
-            if (! (make_temp = cBOOL(SvTEMP(b)))) {
-                SvREFCNT_dec_NN(b);
+
+    if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
+
+        /* Here, 'a' is empty (and b is not).  That means the union will come
+         * entirely from 'b'.  If the output is not to overwrite 'a', we can
+         * just return what's in 'b'.  */
+        if (*output != a) {
+
+            /* If the output is to overwrite 'b', it's already in 'b', but
+             * otherwise we have to copy 'b' to the output */
+            if (*output != b) {
+                *output = invlist_clone(b);
              }
-       }
  
-        /* The complement of an empty list is a list that has everything in it,
-         * so the union with <a> includes everything too */
-        if (complement_b) {
-            if (a == *output) {
-                if (! (make_temp = cBOOL(SvTEMP(a)))) {
-                    SvREFCNT_dec_NN(a);
-                }
+            /* And if the output is to be the inversion of 'b', do that */
+            if (complement_b) {
+                _invlist_invert(*output);
              }
-            *output = _new_invlist(1);
-            _append_range_to_invlist(*output, 0, UV_MAX);
+
+            return;
          }
-        else if (*output != a) {
-            *output = invlist_clone(a);
+
+        /* Here, 'a', which is empty or even NULL, is to be overwritten by the
+         * output, which will either be 'b' or the complement of 'b' */
+
+        if (a == NULL) {
+            *output = invlist_clone(b);
          }
-        /* else *output already = a; */
+        else {
+            u = invlist_clone(b);
+            invlist_replace_list_destroys_src(*output, u);
+            SvREFCNT_dec_NN(u);
+       }
  
-        if (make_temp) {
-            sv_2mortal(*output);
+        if (complement_b) {
+            _invlist_invert(*output);
          }
+
         return;
      }
  
@@ -8947,7 +9082,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
         /* Here, have chosen which of the two inputs to look at.  Only output
          * if the running count changes to/from 0, which marks the
-        * beginning/end of a range in that's in the set */
+        * beginning/end of a range that's in the set */
         if (cp_in_set) {
             if (count == 0) {
                 array_u[i_u++] = cp;
@@ -8981,7 +9116,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
       * 4) the exhausted wasn't in its set, non-exhausted is, count is 1;
       *    decrementing to 0 insures that we look at the remainder of the
       *    non-exhausted set */
-    if ((i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
+    if (   (i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
         || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count--;
@@ -8997,8 +9132,9 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         len_u += (len_a - i_a) + (len_b - i_b);
      }
  
-    /* Set result to final length, which can change the pointer to array_u, so
-     * re-find it */
+    /* Set the result to the final length, which can change the pointer to
+     * array_u, so re-find it.  (Note that it is unlikely that this will
+     * change, as we are shrinking the space, not enlarging it) */
      if (len_u != _invlist_len(u)) {
         invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
         invlist_trim(u);
@@ -9008,7 +9144,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* When 'count' is 0, the list that was exhausted (if one was shorter than
       * the other) ended with everything above it not in its set.  That means
       * that the remaining part of the union is precisely the same as the
-     * non-exhausted list, so can just copy it unchanged.  (If both list were
+     * non-exhausted list, so can just copy it unchanged.  (If both lists were
       * exhausted at the same time, then the operations below will be both 0.)
       */
      if (count == 0) {
@@ -9021,21 +9157,32 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         }
      }
  
-    /*  We may be removing a reference to one of the inputs.  If so, the output
-     *  is made mortal if the input was.  (Mortal SVs shouldn't have their ref
-     *  count decremented) */
-    if (a == *output || b == *output) {
+    /* If the output is not to overwrite either of the inputs, just return the
+     * calculated union */
+    if (a != *output && b != *output) {
+        *output = u;
+    }
+    else {
+        /*  Here, the output is to be the same as one of the input scalars,
+         *  hence replacing it.  The simple thing to do is to free the input
+         *  scalar, making it instead be the output one.  But experience has
+         *  shown [perl #127392] that if the input is a mortal, we can get a
+         *  huge build-up of these during regex compilation before they get
+         *  freed.  So for that case, replace just the input's interior with
+         *  the output's, and then free the output */
+
          assert(! invlist_is_iterating(*output));
-        if ((SvTEMP(*output))) {
-            sv_2mortal(u);
+
+        if (! SvTEMP(*output)) {
+            SvREFCNT_dec_NN(*output);
+            *output = u;
          }
          else {
-            SvREFCNT_dec_NN(*output);
+            invlist_replace_list_destroys_src(*output, u);
+            SvREFCNT_dec_NN(u);
          }
      }
  
-    *output = u;
-
      return;
  }
  
@@ -9046,11 +9193,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* Take the intersection of two inversion lists and point <i> to it.  *i
       * SHOULD BE DEFINED upon input, and if it points to one of the two lists,
       * the reference count to that list will be decremented if not already a
-     * temporary (mortal); otherwise *i will be made correspondingly mortal.
-     * The first list, <a>, may be NULL, in which case an empty list is
-     * returned.  If <complement_b> is TRUE, the result will be the
-     * intersection of <a> and the complement (or inversion) of <b> instead of
-     * <b> directly.
+     * temporary (mortal); otherwise just its contents will be modified to be
+     * the intersection.  The first list, <a>, may be NULL, in which case an
+     * empty list is returned.  If <complement_b> is TRUE, the result will be
+     * the intersection of <a> and the complement (or inversion) of <b> instead
+     * of <b> directly.
       *
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
@@ -9088,48 +9235,38 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* Special case if either one is empty */
      len_a = (a == NULL) ? 0 : _invlist_len(a);
      if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
-        bool make_temp = FALSE;
-
          if (len_a != 0 && complement_b) {
  
-            /* Here, 'a' is not empty, therefore from the above 'if', 'b' must
-             * be empty.  Here, also we are using 'b's complement, which hence
-             * must be every possible code point.  Thus the intersection is
-             * simply 'a'. */
-            if (*i != a) {
-                if (*i == b) {
-                    if (! (make_temp = cBOOL(SvTEMP(b)))) {
-                        SvREFCNT_dec_NN(b);
-                    }
-                }
+            /* Here, 'a' is not empty, therefore from the enclosing 'if', 'b'
+             * must be empty.  Here, also we are using 'b's complement, which
+             * hence must be every possible code point.  Thus the intersection
+             * is simply 'a'. */
  
-                *i = invlist_clone(a);
+            if (*i == a) {  /* No-op */
+                return;
              }
-            /* else *i is already 'a' */
  
-            if (make_temp) {
-                sv_2mortal(*i);
+            /* If not overwriting either input, just make a copy of 'a' */
+            if (*i != b) {
+                *i = invlist_clone(a);
+                return;
              }
+
+            /* Here we are overwriting 'b' with 'a's contents */
+            r = invlist_clone(a);
+            invlist_replace_list_destroys_src(*i, r);
+            SvREFCNT_dec_NN(r);
              return;
          }
  
          /* Here, 'a' or 'b' is empty and not using the complement of 'b'.  The
           * intersection must be empty */
-       if (*i == a) {
-            if (! (make_temp = cBOOL(SvTEMP(a)))) {
-                SvREFCNT_dec_NN(a);
-            }
-       }
-       else if (*i == b) {
-            if (! (make_temp = cBOOL(SvTEMP(b)))) {
-                SvREFCNT_dec_NN(b);
-            }
-       }
-       *i = _new_invlist(0);
-        if (make_temp) {
-            sv_2mortal(*i);
+        if (*i == NULL) {
+            *i = _new_invlist(0);
+            return;
          }
  
+        invlist_clear(*i);
         return;
      }
  
@@ -9227,7 +9364,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
       *    everything that remains in the non-exhausted set.
       * 4) the exhausted wasn't in its set, non-exhausted is, count is 1, and
       *    remains 1.  And the intersection has nothing more. */
-    if ((i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
+    if (   (i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
         || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count++;
@@ -9241,8 +9378,9 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         len_r += (len_a - i_a) + (len_b - i_b);
      }
  
-    /* Set result to final length, which can change the pointer to array_r, so
-     * re-find it */
+    /* Set the result to the final length, which can change the pointer to
+     * array_r, so re-find it.  (Note that it is unlikely that this will
+     * change, as we are shrinking the space, not enlarging it) */
      if (len_r != _invlist_len(r)) {
         invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
         invlist_trim(r);
@@ -9260,21 +9398,38 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         }
      }
  
-    /*  We may be removing a reference to one of the inputs.  If so, the output
-     *  is made mortal if the input was.  (Mortal SVs shouldn't have their ref
-     *  count decremented) */
-    if (a == *i || b == *i) {
+    /* If the output is not to overwrite either of the inputs, just return the
+     * calculated intersection */
+    if (a != *i && b != *i) {
+        *i = r;
+    }
+    else {
+        /*  Here, the output is to be the same as one of the input scalars,
+         *  hence replacing it.  The simple thing to do is to free the input
+         *  scalar, making it instead be the output one.  But experience has
+         *  shown [perl #127392] that if the input is a mortal, we can get a
+         *  huge build-up of these during regex compilation before they get
+         *  freed.  So for that case, replace just the input's interior with
+         *  the output's, and then free the output.  A short-cut in this case
+         *  is if the output is empty, we can just set the input to be empty */
+
          assert(! invlist_is_iterating(*i));
-        if (SvTEMP(*i)) {
-            sv_2mortal(r);
+
+        if (! SvTEMP(*i)) {
+            SvREFCNT_dec_NN(*i);
+            *i = r;
          }
          else {
-            SvREFCNT_dec_NN(*i);
+            if (len_r) {
+                invlist_replace_list_destroys_src(*i, r);
+            }
+            else {
+                invlist_clear(*i);
+            }
+            SvREFCNT_dec_NN(r);
          }
      }
  
-    *i = r;
-
      return;
  }
  
@@ -9510,38 +9665,56 @@ S_invlist_highest(SV* const invlist)
             : array[len - 1] - 1;
  }
  
-#ifndef PERL_IN_XSUB_RE
-SV *
-Perl__invlist_contents(pTHX_ SV* const invlist)
+STATIC SV *
+S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
  {
      /* Get the contents of an inversion list into a string SV so that they can
-     * be printed out.  It uses the format traditionally done for debug tracing
-     */
+     * be printed out.  If 'traditional_style' is TRUE, it uses the format
+     * traditionally done for debug tracing; otherwise it uses a format
+     * suitable for just copying to the output, with blanks between ranges and
+     * a dash between range components */
  
      UV start, end;
-    SV* output = newSVpvs("\n");
+    SV* output;
+    const char intra_range_delimiter = (traditional_style ? '\t' : '-');
+    const char inter_range_delimiter = (traditional_style ? '\n' : ' ');
+
+    if (traditional_style) {
+        output = newSVpvs("\n");
+    }
+    else {
+        output = newSVpvs("");
+    }
  
-    PERL_ARGS_ASSERT__INVLIST_CONTENTS;
+    PERL_ARGS_ASSERT_INVLIST_CONTENTS;
  
      assert(! invlist_is_iterating(invlist));
  
      invlist_iterinit(invlist);
      while (invlist_iternext(invlist, &start, &end)) {
         if (end == UV_MAX) {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"\tINFINITY\n", start);
+           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%cINFINITY%c",
+                                          start, intra_range_delimiter,
+                                                 inter_range_delimiter);
         }
         else if (end != start) {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"\t%04"UVXf"\n",
-                   start,       end);
+           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c%04"UVXf"%c",
+                                         start,
+                                                   intra_range_delimiter,
+                                                  end, inter_range_delimiter);
         }
         else {
-           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"\n", start);
+           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"%c",
+                                          start, inter_range_delimiter);
         }
      }
  
+    if (SvCUR(output) && ! traditional_style) {/* Get rid of trailing blank */
+        SvCUR_set(output, SvCUR(output) - 1);
+    }
+
      return output;
  }
-#endif
  
  #ifndef PERL_IN_XSUB_RE
  void
@@ -10151,7 +10324,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
           * indivisible */
          bool has_intervening_patws = paren == 2 && *(RExC_parse - 1) != '(';
  
-        assert(RExC_parse < RExC_end);
+        if (RExC_parse >= RExC_end) {
+           vFAIL("Unmatched (");
+        }
  
          if ( *RExC_parse == '*') { /* (*VERB:ARG) */
             char *start_verb = RExC_parse + 1;
@@ -10162,7 +10337,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              int internal_argval = -1; /* if >-1 we are not allowed an argument*/
  
              if (has_intervening_patws) {
-                RExC_parse++;
+                RExC_parse++;   /* past the '*' */
                  vFAIL("In '(*VERB...)', the '(' and '*' must be adjacent");
              }
             while (RExC_parse < RExC_end && *RExC_parse != ')' ) {
@@ -10273,14 +10448,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  vFAIL("In '(?...)', the '(' and '?' must be adjacent");
              }
  
-           RExC_parse++;
+           RExC_parse++;           /* past the '?' */
              paren = *RExC_parse;    /* might be a trailing NUL, if not
                                         well-formed */
              RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
              if (RExC_parse > RExC_end) {
                  paren = '\0';
              }
-           ret = NULL;                 /* For lookahead/behind. */
+           ret = NULL;                 /* For look-ahead/behind. */
             switch (paren) {
  
             case 'P':   /* (?P...) variants for those used to PCRE/Python */
@@ -10318,9 +10493,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     char *name_start;
                     SV *svname;
                     paren= '>';
+                /* FALLTHROUGH */
              case '\'':          /* (?'...') */
-                   name_start= RExC_parse;
-                   svname = reg_scan_name(pRExC_state,
+                    name_start = RExC_parse;
+                    svname = reg_scan_name(pRExC_state,
                          SIZE_ONLY    /* reverse test from the others */
                          ? REG_RSN_RETURN_NAME
                          : REG_RSN_RETURN_NULL);
@@ -10779,7 +10955,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             case '[':           /* (?[ ... ]) */
                  return handle_regex_sets(pRExC_state, NULL, flagp, depth,
                                           oregcomp_parse);
-            case 0:
+            case 0: /* A NUL */
                 RExC_parse--; /* for vFAIL to print correctly */
                  vFAIL("Sequence (? incomplete");
                  break;
@@ -11224,7 +11400,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
                      /* We can't back off the size because we have to reserve
                       * enough space for all the things we are about to throw
-                     * away, but we can shrink it by the ammount we are about
+                     * away, but we can shrink it by the amount we are about
                       * to re-use here */
                      RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
                  }
@@ -11387,6 +11563,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                  UV * code_point_p,
                  int * cp_count,
                  I32 * flagp,
+                const bool strict,
                  const U32 depth
      )
  {
@@ -11536,6 +11713,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                                          semantics */
  
      if (endbrace == RExC_parse) {   /* empty: \N{} */
+        if (strict) {
+            RExC_parse++;   /* Position after the "}" */
+            vFAIL("Zero length \\N{}");
+        }
          if (cp_count) {
              *cp_count = 0;
          }
@@ -11550,6 +11731,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  
      RExC_parse += 2;   /* Skip past the 'U+' */
  
+    /* Because toke.c has generated a special construct for us guaranteed not
+     * to have NULs, we can use a str function */
      endchar = RExC_parse + strcspn(RExC_parse, ".}");
  
      /* Code points are separated by dots.  If none, there is only one code
@@ -12413,6 +12596,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                NULL,     /* Don't need a count of how many code
                                             points */
                                flagp,
+                              RExC_strict,
                                depth)
              ) {
                  break;
@@ -12739,6 +12923,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                              NULL,   /* Don't need a count of
                                                         how many code points */
                                              flagp,
+                                            RExC_strict,
                                              depth)
                          ) {
                              if (*flagp & NEED_UTF8)
@@ -13438,9 +13623,7 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
  /* 'posix_warnings' and 'warn_text' are names of variables in the following
   * routine. q.v. */
  #define ADD_POSIX_WARNING(p, text)  STMT_START {                            \
-        if (posix_warnings && (   posix_warnings != (AV **) -1              \
-                               || (PASS2 && ckWARN(WARN_REGEXP))))          \
-        {                                                                   \
+        if (posix_warnings) {                                               \
              if (! warn_text) warn_text = newAV();                           \
              av_push(warn_text, Perl_newSVpvf(aTHX_                          \
                                               WARNING_PREFIX                 \
@@ -13459,8 +13642,9 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                                    besides RExC_parse. */
      char ** updated_parse_ptr, /* Where to set the updated parse pointer, or
                                    NULL */
-    AV ** posix_warnings       /* Where to place any generated warnings, or -1
-                                  if to output them, or NULL */
+    AV ** posix_warnings,      /* Where to place any generated warnings, or
+                                  NULL */
+    const bool check_only      /* Don't die if error */
  )
  {
      /* This parses what the caller thinks may be one of the three POSIX
@@ -13486,19 +13670,13 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
       *      'updated_parse_ptr' is not changed.  No warnings nor errors are
       *      raised.
       *
-     * In b) there may be warnings and even errors generated.  What to do about
-     * these is determined by the 'posix_warnings' parameter.  If it is NULL,
-     * this call is treated as a check-only, scouting-out-the-territory call,
-     * and no warnings nor errors are generated at all.  Otherwise, any errors
-     * are raised if found.  If 'posix_warnings' is -1 (appropriately cast),
-     * warnings are generated and displayed (in pass 2), just as they would be
-     * for any other message of the same type from this file.  If it isn't NULL
-     * and not -1, warnings aren't displayed, but instead an AV is generated
-     * with all the warning messages (that aren't to be ignored) stored into
-     * it, so that the caller can output them if it wants.  This is done in all
+     * In b) there may be errors or warnings generated.  If 'check_only' is
+     * TRUE, then any errors are discarded.  Warnings are returned to the
+     * caller via an AV* created into '*posix_warnings' if it is not NULL.  If
+     * instead it is NULL, warnings are suppressed.  This is done in all
       * passes.  The reason for this is that the rest of the parsing is heavily
       * dependent on whether this routine found a valid posix class or not.  If
-     * it did, the closing ']' is absorbed as part of the class.  If no class
+     * it did, the closing ']' is absorbed as part of the class.  If no class,
       * or an invalid one is found, any ']' will be considered the terminator of
       * the outer bracketed character class, leading to very different results.
       * In particular, a '(?[ ])' construct will likely have a syntax error if
@@ -13620,85 +13798,78 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
  
      /* For [. .] and [= =].  These are quite different internally from [: :],
       * so they are handled separately.  */
-    if (POSIXCC_NOTYET(*p)) {
+    if (POSIXCC_NOTYET(*p) && p < e - 3) /* 1 for the close, and 1 for the ']'
+                                            and 1 for at least one char in it
+                                          */
+    {
          const char open_char  = *p;
          const char * temp_ptr = p + 1;
-        unsigned int len      = 0;
  
          /* These two constructs are not handled by perl, and if we find a
-         * syntactically valid one, we croak.  It looks like just about any
-         * byte can be in them, but they are likely very short, like [.ch.] to
-         * denote a ligature 'ch' single character.  If we find something that
-         * started out to look like one of these constructs, but isn't, we
-         * break so that it can be checked for being a class name with a typo
-         * of '.' or '=' instead of a colon */
-        while (temp_ptr < e) {
-            len++;
-
-            /* qr/[[.].]]/, for example, is valid.  But otherwise we quit on an
-             * unexpected ']'.  It is possible, it appears, for such a ']' to
-             * be not in the final position, but that's so unlikely that that
-             * case is not handled. */
-            if (*temp_ptr == ']' && temp_ptr[1] != open_char) {
-                break;
-            }
-
-            /* XXX this could be cut down, but this value is certainly large
-             * enough */
-            if (len > 10) {
-                break;
-            }
+         * syntactically valid one, we croak.  khw, who wrote this code, finds
+         * this explanation of them very unclear:
+         * http://pubs.opengroup.org/onlinepubs/009696899/basedefs/xbd_chap09.html
+         * And searching the rest of the internet wasn't very helpful either.
+         * It looks like just about any byte can be in these constructs,
+         * depending on the locale.  But unless the pattern is being compiled
+         * under /l, which is very rare, Perl runs under the C or POSIX locale.
+         * In that case, it looks like [= =] isn't allowed at all, and that
+         * [. .] could be any single code point, but for longer strings the
+         * constituent characters would have to be the ASCII alphabetics plus
+         * the minus-hyphen.  Any sensible locale definition would limit itself
+         * to these.  And any portable one definitely should.  Trying to parse
+         * the general case is a nightmare (see [perl #127604]).  So, this code
+         * looks only for interiors of these constructs that match:
+         *      qr/.|[-\w]{2,}/
+         * Using \w relaxes the apparent rules a little, without adding much
+         * danger of mistaking something else for one of these constructs.
+         *
+         * [. .] in some implementations described on the internet is usable to
+         * escape a character that otherwise is special in bracketed character
+         * classes.  For example [.].] means a literal right bracket instead of
+         * the ending of the class
+         *
+         * [= =] can legitimately contain a [. .] construct, but we don't
+         * handle this case, as that [. .] construct will later get parsed
+         * itself and croak then.  And [= =] is checked for even when not under
+         * /l, as Perl has long done so.
+         *
+         * The code below relies on there being a trailing NUL, so it doesn't
+         * have to keep checking if the parse ptr < e.
+         */
+        if (temp_ptr[1] == open_char) {
+            temp_ptr++;
+        }
+        else while (    temp_ptr < e
+                    && (isWORDCHAR(*temp_ptr) || *temp_ptr == '-'))
+        {
+            temp_ptr++;
+        }
  
-            if (*temp_ptr == open_char) {
+        if (*temp_ptr == open_char) {
+            temp_ptr++;
+            if (*temp_ptr == ']') {
                  temp_ptr++;
-                if (*temp_ptr == ']') {
-                    temp_ptr++;
-                    if (! found_problem && posix_warnings) {
-                        RExC_parse = (char *) temp_ptr;
-                        vFAIL3("POSIX syntax [%c %c] is reserved for future "
-                               "extensions", open_char, open_char);
-                    }
-
-                    /* Here, the syntax wasn't completely valid, or else the
-                     * call is to check-only */
-                    if (updated_parse_ptr) {
-                        *updated_parse_ptr = (char *) temp_ptr;
-                    }
-
-                    return OOB_NAMEDCLASS;
+                if (! found_problem && ! check_only) {
+                    RExC_parse = (char *) temp_ptr;
+                    vFAIL3("POSIX syntax [%c %c] is reserved for future "
+                            "extensions", open_char, open_char);
                  }
-            }
-            else if (*temp_ptr == '\\') {
-
-                /* A backslash is treate as like any other character, unless it
-                 * precedes a comment starter.  XXX multiple backslashes in a
-                 * row are not handled specially here, nor would they ever
-                 * likely to be handled specially in one of these constructs */
-                if (temp_ptr[1] == '#' && (RExC_flags & RXf_PMf_EXTENDED)) {
-                    temp_ptr++;
+
+                /* Here, the syntax wasn't completely valid, or else the call
+                 * is to check-only */
+                if (updated_parse_ptr) {
+                    *updated_parse_ptr = (char *) temp_ptr;
                  }
-                temp_ptr++;
-            }
-            else if (*temp_ptr == '#' && (RExC_flags & RXf_PMf_EXTENDED)) {
-                break;  /* Under no circumstances can we look at the interior
-                           of a comment */
-            }
-            else if (*temp_ptr == '\n') {   /* And we don't allow newlines
-                                               either as it's extremely
-                                               unlikely that one could be in an
-                                               intended class */
-                break;
-            }
-            else if (UTF && ! UTF8_IS_INVARIANT(*temp_ptr)) {
-                /* XXX Since perl will never handle multi-byte locales, except
-                 * for UTF-8, we could break if we found a byte above latin1,
-                 * but perhaps the person intended to use one. */
-                temp_ptr += UTF8SKIP(temp_ptr);
-            }
-            else {
-                temp_ptr++;
+
+                return OOB_NAMEDCLASS;
              }
          }
+
+        /* If we find something that started out to look like one of these
+         * constructs, but isn't, we continue below so that it can be checked
+         * for being a class name with a typo of '.' or '=' instead of a colon.
+         * */
      }
  
      /* Here, we think there is a possibility that a [: :] class was meant, and
@@ -14219,16 +14390,11 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
              }
  
              if (warn_text) {
-                if (posix_warnings != (AV **) -1) {
-                    *posix_warnings = warn_text;
+                if (posix_warnings) {
+                    /* mortalize to avoid a leak with FATAL warnings */
+                    *posix_warnings = (AV *) sv_2mortal((SV *) warn_text);
                  }
                  else {
-                    SV * msg;
-                    while ((msg = av_shift(warn_text)) != &PL_sv_undef) {
-                        Perl_warner(aTHX_ packWARN(WARN_REGEXP),
-                                    "%s", SvPVX(msg));
-                        SvREFCNT_dec_NN(msg);
-                    }
                      SvREFCNT_dec_NN(warn_text);
                  }
              }
@@ -14239,7 +14405,7 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
               * one */
              return class_number + complement;
          }
-        else if (posix_warnings) {
+        else if (! check_only) {
  
              /* Here, it is an unrecognized class.  This is an error (unless the
              * call is to check only, which we've already handled above) */
@@ -14349,26 +14515,24 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                  default:
                      break;
                  case '\\':
-                    /* Skip the next byte (which could cause us to end up in
-                     * the middle of a UTF-8 character, but since none of those
-                     * are confusable with anything we currently handle in this
-                     * switch (invariants all), it's safe.  We'll just hit the
-                     * default: case next time and keep on incrementing until
-                     * we find one of the invariants we do handle. */
+                    /* Skip past this, so the next character gets skipped, after
+                     * the switch */
                      RExC_parse++;
                      if (*RExC_parse == 'c') {
                              /* Skip the \cX notation for control characters */
                              RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
                      }
                      break;
+
                  case '[':
                  {
                      /* See if this is a [:posix:] class. */
                      bool is_posix_class = (OOB_NAMEDCLASS
-                                           < handle_possible_posix(pRExC_state,
-                                                                RExC_parse + 1,
-                                                                NULL,
-                                                                NULL));
+                            < handle_possible_posix(pRExC_state,
+                                                RExC_parse + 1,
+                                                NULL,
+                                                NULL,
+                                                TRUE /* checking only */));
                      /* If it is a posix class, leave the parse pointer at the
                       * '[' to fool regclass() into thinking it is part of a
                       * '[[:posix:]]'. */
@@ -14426,13 +14590,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
        no_close:
          /* We output the messages even if warnings are off, because we'll fail
           * the very next thing, and these give a likely diagnosis for that */
-        if (posix_warnings) {
-            SV * msg;
-            while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
-                Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
-                SvREFCNT_dec_NN(msg);
-            }
-            SvREFCNT_dec_NN(posix_warnings);
+        if (posix_warnings && av_tindex(posix_warnings) >= 0) {
+            output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
          }
  
          FAIL("Syntax error in (?[...])");
@@ -14670,10 +14829,11 @@ redo_curchar:
              {
                  /* See if this is a [:posix:] class. */
                  bool is_posix_class = (OOB_NAMEDCLASS
-                                        < handle_possible_posix(pRExC_state,
-                                                            RExC_parse + 1,
-                                                            NULL,
-                                                            NULL));
+                            < handle_possible_posix(pRExC_state,
+                                                RExC_parse + 1,
+                                                NULL,
+                                                NULL,
+                                                TRUE /* checking only */));
                  /* If it is a posix class, leave the parse pointer at the '['
                   * to fool regclass() into thinking it is part of a
                   * '[[:posix:]]'. */
@@ -15148,6 +15308,43 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
      }
  }
  
+STATIC void
+S_output_or_return_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings, AV** return_posix_warnings)
+{
+    /* If the final parameter is NULL, output the elements of the array given
+     * by '*posix_warnings' as REGEXP warnings.  Otherwise, the elements are
+     * pushed onto it, (creating if necessary) */
+
+    SV * msg;
+    const bool first_is_fatal =  ! return_posix_warnings
+                                && ckDEAD(packWARN(WARN_REGEXP));
+
+    PERL_ARGS_ASSERT_OUTPUT_OR_RETURN_POSIX_WARNINGS;
+
+    while ((msg = av_shift(posix_warnings)) != &PL_sv_undef) {
+        if (return_posix_warnings) {
+            if (! *return_posix_warnings) { /* mortalize to not leak if
+                                               warnings are fatal */
+                *return_posix_warnings = (AV *) sv_2mortal((SV *) newAV());
+            }
+            av_push(*return_posix_warnings, msg);
+        }
+        else {
+            if (first_is_fatal) {           /* Avoid leaking this */
+                av_undef(posix_warnings);   /* This isn't necessary if the
+                                               array is mortal, but is a
+                                               fail-safe */
+                (void) sv_2mortal(msg);
+                if (PASS2) {
+                    SAVEFREESV(RExC_rx_sv);
+                }
+            }
+            Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s", SvPVX(msg));
+            SvREFCNT_dec_NN(msg);
+        }
+    }
+}
+
  STATIC AV *
  S_add_multi_match(pTHX_ AV* multi_char_matches, SV* multi_string, const STRLEN cp_count)
  {
@@ -15229,7 +15426,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   bool optimizable,                  /* ? Allow a non-ANYOF return
                                                         node */
                   SV** ret_invlist, /* Return an inversion list, not a node */
-                 AV** posix_warnings
+                 AV** return_posix_warnings
            )
  {
      /* parse a bracketed class specification.  Most of these will produce an
@@ -15343,12 +15540,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      const SSize_t orig_size = RExC_size;
      bool posixl_matches_all = FALSE; /* Does /l class have both e.g. \W,\w ? */
  
-    /* This variable is used to mark where in the input something that looks
-     * like a POSIX construct ends.  During the parse, when something looks
-     * like it could be such a construct is encountered, it is checked for
-     * being one, but not if we've already checked this area of the input.
-     * Only after this position is reached do we check again */
-    char *dont_check_for_posix_end = RExC_parse - 1;
+    /* This variable is used to mark where the end in the input is of something
+     * that looks like a POSIX construct but isn't.  During the parse, when
+     * something looks like it could be such a construct is encountered, it is
+     * checked for being one, but not if we've already checked this area of the
+     * input.  Only after this position is reached do we check again */
+    char *not_posix_region_end = RExC_parse - 1;
+
+    AV* posix_warnings = NULL;
+    const bool do_posix_warnings =     return_posix_warnings
+                                   || (PASS2 && ckWARN(WARN_REGEXP));
  
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -15365,10 +15566,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      allow_multi_folds = FALSE;
  #endif
  
-    if (posix_warnings == NULL) {
-        posix_warnings = (AV **) -1;
-    }
-
      /* Assume we are going to generate an ANYOF node. */
      ret = reganode(pRExC_state,
                     (LOC)
@@ -15393,7 +15590,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      assert(RExC_parse <= RExC_end);
  
-    if (UCHARAT(RExC_parse) == '^') {  /* Complement of range. */
+    if (UCHARAT(RExC_parse) == '^') {  /* Complement the class */
         RExC_parse++;
          invert = TRUE;
          allow_multi_folds = FALSE;
@@ -15403,25 +15600,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
      /* Check that they didn't say [:posix:] instead of [[:posix:]] */
      if (! ret_invlist && MAYBE_POSIXCC(UCHARAT(RExC_parse))) {
-        char *class_end;
-        int maybe_class = handle_possible_posix(pRExC_state, RExC_parse,
-                                             &class_end, NULL);
-        if (maybe_class >= OOB_NAMEDCLASS) {
-            dont_check_for_posix_end = class_end;
-            if (PASS2 && posix_warnings == (AV **) -1) {
-                SAVEFREESV(RExC_rx_sv);
-                ckWARN4reg(class_end,
-                        "POSIX syntax [%c %c] belongs inside character classes%s",
-                        *RExC_parse, *RExC_parse,
-                        (maybe_class == OOB_NAMEDCLASS)
-                        ? ((POSIXCC_NOTYET(*RExC_parse))
-                            ? " (but this one isn't implemented)"
-                            : " (but this one isn't fully valid)")
-                        : ""
-                        );
-                (void)ReREFCNT_inc(RExC_rx_sv);
-            }
-       }
+        int maybe_class = handle_possible_posix(pRExC_state,
+                                                RExC_parse,
+                                                &not_posix_region_end,
+                                                NULL,
+                                                TRUE /* checking only */);
+        if (PASS2 && maybe_class >= OOB_NAMEDCLASS && do_posix_warnings) {
+            SAVEFREESV(RExC_rx_sv);
+            ckWARN4reg(not_posix_region_end,
+                    "POSIX syntax [%c %c] belongs inside character classes%s",
+                    *RExC_parse, *RExC_parse,
+                    (maybe_class == OOB_NAMEDCLASS)
+                    ? ((POSIXCC_NOTYET(*RExC_parse))
+                        ? " (but this one isn't implemented)"
+                        : " (but this one isn't fully valid)")
+                    : ""
+                    );
+            (void)ReREFCNT_inc(RExC_rx_sv);
+        }
      }
  
      /* If the caller wants us to just parse a single element, accomplish this
@@ -15435,6 +15631,21 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         goto charclassloop;
  
      while (1) {
+
+        if (   posix_warnings
+            && av_tindex(posix_warnings) >= 0
+            && RExC_parse > not_posix_region_end)
+        {
+            /* Warnings about posix class issues are considered tentative until
+             * we are far enough along in the parse that we can no longer
+             * change our mind, at which point we either output them or add
+             * them, if it has so specified, to what gets returned to the
+             * caller.  This is done each time through the loop so that a later
+             * class won't zap them before they have been dealt with. */
+            output_or_return_posix_warnings(pRExC_state, posix_warnings,
+                                            return_posix_warnings);
+        }
+
          if  (RExC_parse >= stop_ptr) {
              break;
          }
@@ -15456,7 +15667,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             element_count++;
              non_portable_endpoint = 0;
         }
-       if (UTF) {
+       if (UTF && ! UTF8_IS_INVARIANT(* RExC_parse)) {
             value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
                                    &numlen, UTF8_ALLOW_DEFAULT);
@@ -15466,22 +15677,52 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             value = UCHARAT(RExC_parse++);
  
          if (value == '[') {
-            namedclass = handle_possible_posix(pRExC_state, RExC_parse, &dont_check_for_posix_end, posix_warnings);
+            char * posix_class_end;
+            namedclass = handle_possible_posix(pRExC_state,
+                                               RExC_parse,
+                                               &posix_class_end,
+                                               do_posix_warnings ? &posix_warnings : NULL,
+                                               FALSE    /* die if error */);
              if (namedclass > OOB_NAMEDCLASS) {
-                RExC_parse = dont_check_for_posix_end;
+
+                /* If there was an earlier attempt to parse this particular
+                 * posix class, and it failed, it was a false alarm, as this
+                 * successful one proves */
+                if (   posix_warnings
+                    && av_tindex(posix_warnings) >= 0
+                    && not_posix_region_end >= RExC_parse
+                    && not_posix_region_end <= posix_class_end)
+                {
+                    av_undef(posix_warnings);
+                }
+
+                RExC_parse = posix_class_end;
+            }
+            else if (namedclass == OOB_NAMEDCLASS) {
+                not_posix_region_end = posix_class_end;
              }
              else {
                  namedclass = OOB_NAMEDCLASS;
              }
          }
-        else if (   RExC_parse - 1 > dont_check_for_posix_end
+        else if (   RExC_parse - 1 > not_posix_region_end
                   && MAYBE_POSIXCC(value))
          {
-            (void) handle_possible_posix(pRExC_state, RExC_parse - 1,  /* -1 because parse has already been advanced */
-                    &dont_check_for_posix_end, posix_warnings);
+            (void) handle_possible_posix(
+                        pRExC_state,
+                        RExC_parse - 1,  /* -1 because parse has already been
+                                            advanced */
+                        &not_posix_region_end,
+                        do_posix_warnings ? &posix_warnings : NULL,
+                        TRUE /* checking only */);
          }
          else if (value == '\\') {
              /* Is a backslash; get the code point of the char after it */
+
+            if (RExC_parse >= RExC_end) {
+                vFAIL("Unmatched [");
+            }
+
             if (UTF && ! UTF8_IS_INVARIANT(UCHARAT(RExC_parse))) {
                 value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
@@ -15523,6 +15764,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                          &value,    /* Yes single value */
                                          &cp_count, /* Multiple code pt count */
                                          flagp,
+                                        strict,
                                          depth)
                      ) {
  
@@ -15535,11 +15777,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              vFAIL("\\N in a character class must be a named character: \\N{...}");
                          }
                          else if (cp_count == 0) {
-                            if (strict) {
-                                RExC_parse++;   /* Position after the "}" */
-                                vFAIL("Zero length \\N{}");
-                            }
-                            else if (PASS2) {
+                            if (PASS2) {
                                  ckWARNreg(RExC_parse,
                                          "Ignoring zero length \\N{} in character class");
                              }
@@ -15595,7 +15833,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                        |_CORE_SWASH_INIT_ACCEPT_INVLIST;
  
                 if (RExC_parse >= RExC_end)
-                   vFAIL2("Empty \\%c{}", (U8)value);
+                   vFAIL2("Empty \\%c", (U8)value);
                 if (*RExC_parse == '{') {
                     const U8 c = (U8)value;
                     e = strchr(RExC_parse, '}');
@@ -15643,6 +15881,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      SV* invlist;
                      char* name;
                      char* base_name;    /* name after any packages are stripped */
+                    char* lookup_name = NULL;
                      const char * const colon_colon = "::";
  
                      /* Try to get the definition of the property into
@@ -15650,25 +15889,28 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                       * will have its name be <__NAME_i>.  The design is
                       * discussed in commit
                       * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
-                    name = savepv(Perl_form(aTHX_
-                                          "%s%.*s%s\n",
-                                          (FOLD) ? "__" : "",
-                                          (int)n,
-                                          RExC_parse,
-                                          (FOLD) ? "_i" : ""
-                                ));
+                    name = savepv(Perl_form(aTHX_ "%.*s", (int)n, RExC_parse));
+                    SAVEFREEPV(name);
+                    if (FOLD) {
+                        lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
+                    }
  
                      /* Look up the property name, and get its swash and
                       * inversion list, if the property is found  */
-                    if (swash) {    /* Return any left-overs */
-                        SvREFCNT_dec_NN(swash);
-                    }
-                    swash = _core_swash_init("utf8", name, &PL_sv_undef,
+                    SvREFCNT_dec(swash); /* Free any left-overs */
+                    swash = _core_swash_init("utf8",
+                                             (lookup_name)
+                                              ? lookup_name
+                                              : name,
+                                             &PL_sv_undef,
                                               1, /* binary */
                                               0, /* not tr/// */
                                               NULL, /* No inversion list */
                                               &swash_init_flags
                                              );
+                    if (lookup_name) {
+                        Safefree(lookup_name);
+                    }
                      if (! swash || ! (invlist = _get_swash_invlist(swash))) {
                          HV* curpkg = (IN_PERL_COMPILETIME)
                                        ? PL_curstash
@@ -15731,13 +15973,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                                              pkgname,
                                                              name);
                                  n = strlen(full_name);
-                                Safefree(name);
                                  name = savepvn(full_name, n);
+                                SAVEFREEPV(name);
                              }
                          }
-                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%"UTF8f"\n",
+                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%"UTF8f"%s\n",
                                          (value == 'p' ? '+' : '!'),
-                                        UTF8fARG(UTF, n, name));
+                                        (FOLD) ? "__" : "",
+                                        UTF8fARG(UTF, n, name),
+                                        (FOLD) ? "_i" : "");
                          has_user_defined_property = TRUE;
                          optimizable = FALSE;    /* Will have to leave this an
                                                     ANYOF node */
@@ -15785,7 +16029,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              _invlist_union(properties, invlist, &properties);
                         }
                     }
-                   Safefree(name);
                 }
                 RExC_parse = e + 1;
                  namedclass = ANYOF_UNIPROP;  /* no official name, but it's
@@ -16338,7 +16581,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                             "\"%.*s\" is more clearly written simply as \"%s\"",
                             (int) (RExC_parse - rangebegin),
                             rangebegin,
-                           cntrl_to_mnemonic((char) value)
+                           cntrl_to_mnemonic((U8) value)
                          );
                  }
              }
@@ -16391,6 +16634,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         range = 0; /* this range (if it was one) is done now */
      } /* End of loop through all the text within the brackets */
  
+
+    if (   posix_warnings && av_tindex(posix_warnings) >= 0) {
+        output_or_return_posix_warnings(pRExC_state, posix_warnings,
+                                        return_posix_warnings);
+    }
+
      /* If anything in the class expands to more than one character, we have to
       * deal with them by building up a substitute parse string, and recursively
       * calling reg() on it, instead of proceeding */
@@ -16511,9 +16760,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          if (UNLIKELY(posixl_matches_all)) {
              op = SANY;
          }
-        else if (namedclass > OOB_NAMEDCLASS) { /* this is a named class, like
-                                                   \w or [:digit:] or \p{foo}
-                                                 */
+        else if (namedclass > OOB_NAMEDCLASS) { /* this is a single named
+                                                   class, like \w or [:digit:]
+                                                   or \p{foo} */
  
              /* All named classes are mapped into POSIXish nodes, with its FLAG
               * argument giving which class it is */
@@ -16938,7 +17187,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              /* If it matters to the final outcome, see if a non-property
               * component of the class matches above Unicode.  If so, the
               * warning gets suppressed.  This is true even if just a single
-             * such code point is specified, as though not strictly correct if
+             * such code point is specified, as, though not strictly correct if
               * another such code point is matched against, the fact that they
               * are using above-Unicode code points indicates they should know
               * the issues involved */
@@ -16979,6 +17228,19 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * fetching).  We know to set the flag if we have a non-NULL list for UTF-8
       * locales, or the class matches at least one 0-255 range code point */
      if (LOC && FOLD) {
+
+        /* Some things on the list might be unconditionally included because of
+         * other components.  Remove them, and clean up the list if it goes to
+         * 0 elements */
+        if (only_utf8_locale_list && cp_list) {
+            _invlist_subtract(only_utf8_locale_list, cp_list,
+                              &only_utf8_locale_list);
+
+            if (_invlist_len(only_utf8_locale_list) == 0) {
+                SvREFCNT_dec_NN(only_utf8_locale_list);
+                only_utf8_locale_list = NULL;
+            }
+        }
          if (only_utf8_locale_list) {
              ANYOF_FLAGS(ret)
                   |=  ANYOFL_FOLD
@@ -16996,60 +17258,64 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
  #define MATCHES_ALL_NON_UTF8_NON_ASCII(ret)                                 \
      (   DEPENDS_SEMANTICS                                                   \
-     && ANYOF_FLAGS(ret)                                                    \
-        & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
+     && (ANYOF_FLAGS(ret)                                                   \
+        & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
  
      /* See if we can simplify things under /d */
      if (   has_upper_latin1_only_utf8_matches
          || MATCHES_ALL_NON_UTF8_NON_ASCII(ret))
      {
-        if (has_upper_latin1_only_utf8_matches) {
-            if (MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
-
-                /* Here, we have two, almost opposite, constraints in effect
-                 * for upper latin1 characters.  The macro means they all match
-                 * when the target string ISN'T in UTF-8.
-                 * 'has_upper_latin1_only_utf8_matches' contains the chars that
-                 * match only if the target string IS UTF-8.  Therefore the
-                 * ones in 'has_upper_latin1_only_utf8_matches' match
-                 * regardless of UTF-8, so can be added to the regular list,
-                 * and 'has_upper_latin1_only_utf8_matches' cleared */
-                _invlist_union(cp_list,
-                               has_upper_latin1_only_utf8_matches,
-                               &cp_list);
-                SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
-                has_upper_latin1_only_utf8_matches = NULL;
-            }
-            else if (cp_list) {
-
-                /* Here, 'cp_list' gives chars that always match, and
-                 * 'has_upper_latin1_only_utf8_matches' gives chars that were
-                 * specified to match only if the target string is in UTF-8.
-                 * It may be that these overlap, so we can subtract the
-                 * unconditionally matching from the conditional ones, to make
-                 * the conditional list as small as possible, perhaps even
-                 * clearing it, in which case more optimizations are possible
-                 * later */
-                _invlist_subtract(has_upper_latin1_only_utf8_matches,
-                                  cp_list,
-                                  &has_upper_latin1_only_utf8_matches);
-                if (_invlist_len(has_upper_latin1_only_utf8_matches) == 0) {
+        /* But not if we are inverting, as that screws it up */
+        if (! invert) {
+            if (has_upper_latin1_only_utf8_matches) {
+                if (MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
+
+                    /* Here, we have both the flag and inversion list.  Any
+                     * character in 'has_upper_latin1_only_utf8_matches'
+                     * matches when UTF-8 is in effect, but it also matches
+                     * when UTF-8 is not in effect because of
+                     * MATCHES_ALL_NON_UTF8_NON_ASCII.  Therefore it matches
+                     * unconditionally, so can be added to the regular list,
+                     * and 'has_upper_latin1_only_utf8_matches' cleared */
+                    _invlist_union(cp_list,
+                                   has_upper_latin1_only_utf8_matches,
+                                   &cp_list);
                      SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
                      has_upper_latin1_only_utf8_matches = NULL;
                  }
+                else if (cp_list) {
+
+                    /* Here, 'cp_list' gives chars that always match, and
+                     * 'has_upper_latin1_only_utf8_matches' gives chars that
+                     * were specified to match only if the target string is in
+                     * UTF-8.  It may be that these overlap, so we can subtract
+                     * the unconditionally matching from the conditional ones,
+                     * to make the conditional list as small as possible,
+                     * perhaps even clearing it, in which case more
+                     * optimizations are possible later */
+                    _invlist_subtract(has_upper_latin1_only_utf8_matches,
+                                      cp_list,
+                                      &has_upper_latin1_only_utf8_matches);
+                    if (_invlist_len(has_upper_latin1_only_utf8_matches) == 0) {
+                        SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
+                        has_upper_latin1_only_utf8_matches = NULL;
+                    }
+                }
              }
-        }
  
-        /* Similarly, if the unconditional matches include every upper latin1
-         * character, we can clear that flag to permit later optimizations */
-        if (cp_list && MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
-            SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1);
-            _invlist_subtract(only_non_utf8_list, cp_list, &only_non_utf8_list);
-            if (_invlist_len(only_non_utf8_list) == 0) {
-                ANYOF_FLAGS(ret) &= ~ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+            /* Similarly, if the unconditional matches include every upper
+             * latin1 character, we can clear that flag to permit later
+             * optimizations */
+            if (cp_list && MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
+                SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1);
+                _invlist_subtract(only_non_utf8_list, cp_list,
+                                  &only_non_utf8_list);
+                if (_invlist_len(only_non_utf8_list) == 0) {
+                    ANYOF_FLAGS(ret) &= ~ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+                }
+                SvREFCNT_dec_NN(only_non_utf8_list);
+                only_non_utf8_list = NULL;;
              }
-            SvREFCNT_dec_NN(only_non_utf8_list);
-            only_non_utf8_list = NULL;;
          }
  
          /* If we haven't gotten rid of all conditional matching, we change the
@@ -17413,7 +17679,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
                                          bool doinit,
                                          SV** listsvp,
                                          SV** only_utf8_locale_ptr,
-                                        SV*  exclude_list)
+                                        SV** output_invlist)
  
  {
      /* For internal core use only.
@@ -17425,20 +17691,32 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
       *    swash exists, by calling this function with 'doinit' set to false, in
       *    which case the components that will be used to eventually create the
       *    swash are returned  (in a printable form).
-     * If <exclude_list> is not NULL, it is an inversion list of things to
-     *    exclude from what's returned in <listsvp>.
+     * If <only_utf8_locale_ptr> is not NULL, it is where this routine is to
+     *    store an inversion list of code points that should match only if the
+     *    execution-time locale is a UTF-8 one.
+     * If <output_invlist> is not NULL, it is where this routine is to store an
+     *    inversion list of the code points that would be instead returned in
+     *    <listsvp> if this were NULL.  Thus, what gets output in <listsvp>
+     *    when this parameter is used, is just the non-code point data that
+     *    will go into creating the swash.  This currently should be just
+     *    user-defined properties whose definitions were not known at compile
+     *    time.  Using this parameter allows for easier manipulation of the
+     *    swash's data by the caller.  It is illegal to call this function with
+     *    this parameter set, but not <listsvp>
+     *
       * Tied intimately to how S_set_ANYOF_arg sets up the data structure.  Note
       * that, in spite of this function's name, the swash it returns may include
       * the bitmap data as well */
  
      SV *sw  = NULL;
      SV *si  = NULL;         /* Input swash initialization string */
-    SV*  invlist = NULL;
+    SV* invlist = NULL;
  
      RXi_GET_DECL(prog,progi);
      const struct reg_data * const data = prog ? progi->data : NULL;
  
      PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
+    assert(! output_invlist || listsvp);
  
      if (data && data->count) {
         const U32 n = ARG(node);
@@ -17500,7 +17778,7 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
  
      /* If requested, return a printable version of what this swash matches */
      if (listsvp) {
-       SV* matches_string = newSVpvs("");
+       SV* matches_string = NULL;
  
          /* The swash should be used, if possible, to get the data, as it
           * contains the resolved data.  But this function can be called at
@@ -17510,22 +17788,124 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp *prog,
         if ((! sw || (invlist = _get_swash_invlist(sw)) == NULL)
              && (si && si != &PL_sv_undef))
          {
-           sv_catsv(matches_string, si);
+            /* Here, we only have 'si' (and possibly some passed-in data in
+             * 'invlist', which is handled below)  If the caller only wants
+             * 'si', use that.  */
+            if (! output_invlist) {
+                matches_string = newSVsv(si);
+            }
+            else {
+                /* But if the caller wants an inversion list of the node, we
+                 * need to parse 'si' and place as much as possible in the
+                 * desired output inversion list, making 'matches_string' only
+                 * contain the currently unresolvable things */
+                const char *si_string = SvPVX(si);
+                STRLEN remaining = SvCUR(si);
+                UV prev_cp = 0;
+                U8 count = 0;
+
+                /* Ignore everything before the first new-line */
+                while (*si_string != '\n' && remaining > 0) {
+                    si_string++;
+                    remaining--;
+                }
+                assert(remaining > 0);
+
+                si_string++;
+                remaining--;
+
+                while (remaining > 0) {
+
+                    /* The data consists of just strings defining user-defined
+                     * property names, but in prior incarnations, and perhaps
+                     * somehow from pluggable regex engines, it could still
+                     * hold hex code point definitions.  Each component of a
+                     * range would be separated by a tab, and each range by a
+                     * new-line.  If these are found, instead add them to the
+                     * inversion list */
+                    I32 grok_flags =  PERL_SCAN_SILENT_ILLDIGIT
+                                     |PERL_SCAN_SILENT_NON_PORTABLE;
+                    STRLEN len = remaining;
+                    UV cp = grok_hex(si_string, &len, &grok_flags, NULL);
+
+                    /* If the hex decode routine found something, it should go
+                     * up to the next \n */
+                    if (   *(si_string + len) == '\n') {
+                        if (count) {    /* 2nd code point on line */
+                            *output_invlist = _add_range_to_invlist(*output_invlist, prev_cp, cp);
+                        }
+                        else {
+                            *output_invlist = add_cp_to_invlist(*output_invlist, cp);
+                        }
+                        count = 0;
+                        goto prepare_for_next_iteration;
+                    }
+
+                    /* If the hex decode was instead for the lower range limit,
+                     * save it, and go parse the upper range limit */
+                    if (*(si_string + len) == '\t') {
+                        assert(count == 0);
+
+                        prev_cp = cp;
+                        count = 1;
+                      prepare_for_next_iteration:
+                        si_string += len + 1;
+                        remaining -= len + 1;
+                        continue;
+                    }
+
+                    /* Here, didn't find a legal hex number.  Just add it from
+                     * here to the next \n */
+
+                    remaining -= len;
+                    while (*(si_string + len) != '\n' && remaining > 0) {
+                        remaining--;
+                        len++;
+                    }
+                    if (*(si_string + len) == '\n') {
+                        len++;
+                        remaining--;
+                    }
+                    if (matches_string) {
+                        sv_catpvn(matches_string, si_string, len - 1);
+                    }
+                    else {
+                        matches_string = newSVpvn(si_string, len - 1);
+                    }
+                    si_string += len;
+                    sv_catpvs(matches_string, " ");
+                } /* end of loop through the text */
+
+                assert(matches_string);
+                if (SvCUR(matches_string)) {  /* Get rid of trailing blank */
+                    SvCUR_set(matches_string, SvCUR(matches_string) - 1);
+                }
+            } /* end of has an 'si' but no swash */
         }
  
-       /* Add the inversion list to whatever we have.  This may have come from
-        * the swash, or from an input parameter */
-       if (invlist) {
-            if (exclude_list) {
-                SV* clone = invlist_clone(invlist);
-                _invlist_subtract(clone, exclude_list, &clone);
-                sv_catsv(matches_string, _invlist_contents(clone));
-                SvREFCNT_dec_NN(clone);
+        /* If we have a swash in place, its equivalent inversion list was above
+         * placed into 'invlist'.  If not, this variable may contain a stored
+         * inversion list which is information beyond what is in 'si' */
+        if (invlist) {
+
+            /* Again, if the caller doesn't want the output inversion list, put
+             * everything in 'matches-string' */
+            if (! output_invlist) {
+                if ( ! matches_string) {
+                    matches_string = newSVpvs("\n");
+                }
+                sv_catsv(matches_string, invlist_contents(invlist,
+                                                  TRUE /* traditional style */
+                                                  ));
+            }
+            else if (! *output_invlist) {
+                *output_invlist = invlist_clone(invlist);
              }
              else {
-                sv_catsv(matches_string, _invlist_contents(invlist));
+                _invlist_union(*output_invlist, invlist, output_invlist);
              }
-       }
+        }
+
         *listsvp = matches_string;
      }
  
@@ -17638,14 +18018,14 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
      PERL_ARGS_ASSERT_NEXTCHAR;
  
      if (RExC_parse < RExC_end) {
-    assert(   ! UTF
-           || UTF8_IS_INVARIANT(*RExC_parse)
-           || UTF8_IS_START(*RExC_parse));
+        assert(   ! UTF
+               || UTF8_IS_INVARIANT(*RExC_parse)
+               || UTF8_IS_START(*RExC_parse));
  
-    RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+        RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
  
-    skip_to_be_ignored_text(pRExC_state, &RExC_parse,
-                            FALSE /* Don't assume /x */ );
+        skip_to_be_ignored_text(pRExC_state, &RExC_parse,
+                                FALSE /* Don't assume /x */ );
      }
  }
  
@@ -18159,6 +18539,53 @@ Perl_regdump(pTHX_ const regexp *r)
  #endif /* DEBUGGING */
  }
  
+/* Should be synchronized with ANYOF_ #defines in regcomp.h */
+#ifdef DEBUGGING
+
+#  if   _CC_WORDCHAR != 0 || _CC_DIGIT != 1        || _CC_ALPHA != 2    \
+     || _CC_LOWER != 3    || _CC_UPPER != 4        || _CC_PUNCT != 5    \
+     || _CC_PRINT != 6    || _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8    \
+     || _CC_CASED != 9    || _CC_SPACE != 10       || _CC_BLANK != 11   \
+     || _CC_XDIGIT != 12  || _CC_CNTRL != 13       || _CC_ASCII != 14   \
+     || _CC_VERTSPACE != 15
+#   error Need to adjust order of anyofs[]
+#  endif
+static const char * const anyofs[] = {
+    "\\w",
+    "\\W",
+    "\\d",
+    "\\D",
+    "[:alpha:]",
+    "[:^alpha:]",
+    "[:lower:]",
+    "[:^lower:]",
+    "[:upper:]",
+    "[:^upper:]",
+    "[:punct:]",
+    "[:^punct:]",
+    "[:print:]",
+    "[:^print:]",
+    "[:alnum:]",
+    "[:^alnum:]",
+    "[:graph:]",
+    "[:^graph:]",
+    "[:cased:]",
+    "[:^cased:]",
+    "\\s",
+    "\\S",
+    "[:blank:]",
+    "[:^blank:]",
+    "[:xdigit:]",
+    "[:^xdigit:]",
+    "[:cntrl:]",
+    "[:^cntrl:]",
+    "[:ascii:]",
+    "[:^ascii:]",
+    "\\v",
+    "\\V"
+};
+#endif
+
  /*
  - regprop - printable representation of opcode, with run time support
  */
@@ -18168,49 +18595,6 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  {
  #ifdef DEBUGGING
      int k;
-
-    /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
-    static const char * const anyofs[] = {
-#if _CC_WORDCHAR != 0 || _CC_DIGIT != 1 || _CC_ALPHA != 2 || _CC_LOWER != 3 \
-    || _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6                   \
-    || _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8 || _CC_CASED != 9            \
-    || _CC_SPACE != 10 || _CC_BLANK != 11 || _CC_XDIGIT != 12               \
-    || _CC_CNTRL != 13 || _CC_ASCII != 14 || _CC_VERTSPACE != 15
-  #error Need to adjust order of anyofs[]
-#endif
-        "\\w",
-        "\\W",
-        "\\d",
-        "\\D",
-        "[:alpha:]",
-        "[:^alpha:]",
-        "[:lower:]",
-        "[:^lower:]",
-        "[:upper:]",
-        "[:^upper:]",
-        "[:punct:]",
-        "[:^punct:]",
-        "[:print:]",
-        "[:^print:]",
-        "[:alnum:]",
-        "[:^alnum:]",
-        "[:graph:]",
-        "[:^graph:]",
-        "[:cased:]",
-        "[:^cased:]",
-        "\\s",
-        "\\S",
-        "[:blank:]",
-        "[:^blank:]",
-        "[:xdigit:]",
-        "[:^xdigit:]",
-        "[:cntrl:]",
-        "[:^cntrl:]",
-        "[:ascii:]",
-        "[:^ascii:]",
-        "\\v",
-        "\\V"
-    };
      RXi_GET_DECL(prog,progi);
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -18267,10 +18651,13 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
              sv_catpvs(sv, "[");
              (void) put_charclass_bitmap_innards(sv,
-                                                (IS_ANYOF_TRIE(op))
+                                                ((IS_ANYOF_TRIE(op))
                                                   ? ANYOF_BITMAP(o)
-                                                 : TRIE_BITMAP(trie),
-                                                NULL);
+                                                 : TRIE_BITMAP(trie)),
+                                                NULL,
+                                                NULL,
+                                                NULL
+                                               );
              sv_catpvs(sv, "]");
          }
  
@@ -18353,152 +18740,133 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
         Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
      else if (k == ANYOF) {
         const U8 flags = ANYOF_FLAGS(o);
-       int do_sep = 0;
-        SV* bitmap_invlist = NULL;  /* Will hold what the bit map contains */
+        bool do_sep = FALSE;    /* Do we need to separate various components of
+                                   the output? */
+        /* Set if there is still an unresolved user-defined property */
+        SV *unresolved                = NULL;
  
+        /* Things that are ignored except when the runtime locale is UTF-8 */
+        SV *only_utf8_locale_invlist = NULL;
+
+        /* Code points that don't fit in the bitmap */
+        SV *nonbitmap_invlist = NULL;
+
+        /* And things that aren't in the bitmap, but are small enough to be */
+        SV* bitmap_range_not_in_bitmap = NULL;
  
         if (OP(o) == ANYOFL) {
              if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
-                sv_catpvs(sv, "{utf8-loc}");
+                sv_catpvs(sv, "{utf8-locale-reqd}");
              }
-            else {
-                sv_catpvs(sv, "{loc}");
+            if (flags & ANYOFL_FOLD) {
+                sv_catpvs(sv, "{i}");
              }
          }
-       if (flags & ANYOFL_FOLD)
-           sv_catpvs(sv, "{i}");
-       Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
-       if (flags & ANYOF_INVERT)
-           sv_catpvs(sv, "^");
  
-        /* output what the standard cp 0-NUM_ANYOF_CODE_POINTS-1 bitmap matches
-         * */
-        do_sep = put_charclass_bitmap_innards(sv, ANYOF_BITMAP(o),
-                                                            &bitmap_invlist);
+        /* If there is stuff outside the bitmap, get it */
+        if (ARG(o) != ANYOF_ONLY_HAS_BITMAP) {
+            (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
+                                                &unresolved,
+                                                &only_utf8_locale_invlist,
+                                                &nonbitmap_invlist);
+            /* The non-bitmap data may contain stuff that could fit in the
+             * bitmap.  This could come from a user-defined property being
+             * finally resolved when this call was done; or much more likely
+             * because there are matches that require UTF-8 to be valid, and so
+             * aren't in the bitmap.  This is teased apart later */
+            _invlist_intersection(nonbitmap_invlist,
+                                  PL_InBitmap,
+                                  &bitmap_range_not_in_bitmap);
+            /* Leave just the things that don't fit into the bitmap */
+            _invlist_subtract(nonbitmap_invlist,
+                              PL_InBitmap,
+                              &nonbitmap_invlist);
+        }
  
-        /* output any special charclass tests (used entirely under use
-         * locale) * */
-       if (ANYOF_POSIXL_TEST_ANY_SET(o)) {
-            int i;
-           for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
-               if (ANYOF_POSIXL_TEST(o,i)) {
-                   sv_catpv(sv, anyofs[i]);
-                   do_sep = 1;
-               }
-            }
+        /* Obey this flag to add all above-the-bitmap code points */
+        if (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+            nonbitmap_invlist = _add_range_to_invlist(nonbitmap_invlist,
+                                                      NUM_ANYOF_CODE_POINTS,
+                                                      UV_MAX);
          }
  
-        if (    ARG(o) != ANYOF_ONLY_HAS_BITMAP
-           || (flags
-                & ( ANYOF_MATCHES_ALL_ABOVE_BITMAP
-                   |ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP
-                   |ANYOFL_FOLD)))
-        {
+        /* Ready to start outputting.  First, the initial left bracket */
+       Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+
+        /* Then all the things that could fit in the bitmap */
+        do_sep = put_charclass_bitmap_innards(sv,
+                                              ANYOF_BITMAP(o),
+                                              bitmap_range_not_in_bitmap,
+                                              only_utf8_locale_invlist,
+                                              o);
+        SvREFCNT_dec(bitmap_range_not_in_bitmap);
+
+        /* If there are user-defined properties which haven't been defined yet,
+         * output them, in a separate [] from the bitmap range stuff */
+        if (unresolved) {
              if (do_sep) {
                  Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
-                if (flags & ANYOF_INVERT)
-                    /*make sure the invert info is in each */
-                    sv_catpvs(sv, "^");
              }
-
-            if (OP(o) == ANYOFD
-                && (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
-            {
-                sv_catpvs(sv, "{non-utf8-latin1-all}");
+            if (flags & ANYOF_INVERT) {
+                sv_catpvs(sv, "^");
              }
+            sv_catsv(sv, unresolved);
+            do_sep = TRUE;
+            SvREFCNT_dec_NN(unresolved);
+        }
  
-            if (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
-                sv_catpvs(sv, "{above_bitmap_all}");
-
-            if (ARG(o) != ANYOF_ONLY_HAS_BITMAP) {
-                SV *lv; /* Set if there is something outside the bit map. */
-                bool byte_output = FALSE;   /* If something has been output */
-                SV *only_utf8_locale;
-
-                /* Get the stuff that wasn't in the bitmap.  'bitmap_invlist'
-                 * is used to guarantee that nothing in the bitmap gets
-                 * returned */
-                (void) _get_regclass_nonbitmap_data(prog, o, FALSE,
-                                                    &lv, &only_utf8_locale,
-                                                    bitmap_invlist);
-                if (lv && lv != &PL_sv_undef) {
-                    char *s = savesvpv(lv);
-                    char * const origs = s;
-
-                    while (*s && *s != '\n')
-                        s++;
-
-                    if (*s == '\n') {
-                        const char * const t = ++s;
-
-                        if (flags & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP) {
-                            if (OP(o) == ANYOFD) {
-                                sv_catpvs(sv, "{utf8}");
-                            }
-                            else {
-                                sv_catpvs(sv, "{outside bitmap}");
-                            }
-                        }
+        /* And, finally, add the above-the-bitmap stuff */
+        if (nonbitmap_invlist && _invlist_len(nonbitmap_invlist)) {
+            SV* contents;
  
-                        if (byte_output) {
-                            sv_catpvs(sv, " ");
-                        }
+            /* See if truncation size is overridden */
+            const STRLEN dump_len = (PL_dump_re_max_len)
+                                    ? PL_dump_re_max_len
+                                    : 256;
  
-                        while (*s) {
-                            if (*s == '\n') {
+            /* This is output in a separate [] */
+            if (do_sep) {
+                Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
+            }
  
-                                /* Truncate very long output */
-                                if (s - origs > 256) {
-                                    Perl_sv_catpvf(aTHX_ sv,
-                                                "%.*s...",
-                                                (int) (s - origs - 1),
-                                                t);
-                                    goto out_dump;
-                                }
-                                *s = ' ';
-                            }
-                            else if (*s == '\t') {
-                                *s = '-';
-                            }
-                            s++;
-                        }
-                        if (s[-1] == ' ')
-                            s[-1] = 0;
+            /* And, for easy of understanding, it is always output not-shown as
+             * complemented */
+            if (flags & ANYOF_INVERT) {
+                _invlist_invert(nonbitmap_invlist);
+                _invlist_subtract(nonbitmap_invlist, PL_InBitmap, &nonbitmap_invlist);
+            }
  
-                        sv_catpv(sv, t);
-                    }
+            contents = invlist_contents(nonbitmap_invlist,
+                                        FALSE /* output suitable for catsv */
+                                       );
  
-                  out_dump:
+            /* If the output is shorter than the permissible maximum, just do it. */
+            if (SvCUR(contents) <= dump_len) {
+                sv_catsv(sv, contents);
+            }
+            else {
+                const char * contents_string = SvPVX(contents);
+                STRLEN i = dump_len;
  
-                    Safefree(origs);
-                    SvREFCNT_dec_NN(lv);
+                /* Otherwise, start at the permissible max and work back to the
+                 * first break possibility */
+                while (i > 0 && contents_string[i] != ' ') {
+                    i--;
                  }
-
-                if ((flags & ANYOFL_FOLD)
-                     && only_utf8_locale
-                     && only_utf8_locale != &PL_sv_undef)
-                {
-                    UV start, end;
-                    int max_entries = 256;
-
-                    sv_catpvs(sv, "{utf8 locale}");
-                    invlist_iterinit(only_utf8_locale);
-                    while (invlist_iternext(only_utf8_locale,
-                                            &start, &end)) {
-                        put_range(sv, start, end, FALSE);
-                        max_entries --;
-                        if (max_entries < 0) {
-                            sv_catpvs(sv, "...");
-                            break;
-                        }
-                    }
-                    invlist_iterfinish(only_utf8_locale);
+                if (i == 0) {       /* Fail-safe.  Use the max if we couldn't
+                                       find a legal break */
+                    i = dump_len;
                  }
+
+                sv_catpvn(sv, contents_string, i);
+                sv_catpvs(sv, "...");
              }
-       }
-        SvREFCNT_dec(bitmap_invlist);
  
+            SvREFCNT_dec_NN(contents);
+            SvREFCNT_dec_NN(nonbitmap_invlist);
+        }
  
+        /* And finally the matching, closing ']' */
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
      }
      else if (k == POSIXD || k == NPOSIXD) {
@@ -19165,18 +19533,18 @@ S_put_code_point(pTHX_ SV *sv, UV c)
      }
      else if (isPRINT(c)) {
         const char string = (char) c;
-       if (isBACKSLASHED_PUNCT(c))
+
+        /* We use {phrase} as metanotation in the class, so also escape literal
+         * braces */
+       if (isBACKSLASHED_PUNCT(c) || c == '{' || c == '}')
             sv_catpvs(sv, "\\");
         sv_catpvn(sv, &string, 1);
      }
+    else if (isMNEMONIC_CNTRL(c)) {
+        Perl_sv_catpvf(aTHX_ sv, "%s", cntrl_to_mnemonic((U8) c));
+    }
      else {
-        const char * const mnemonic = cntrl_to_mnemonic((char) c);
-        if (mnemonic) {
-            Perl_sv_catpvf(aTHX_ sv, "%s", mnemonic);
-        }
-        else {
-            Perl_sv_catpvf(aTHX_ sv, "\\x{%02X}", (U8) c);
-        }
+        Perl_sv_catpvf(aTHX_ sv, "\\x%02X", (U8) c);
      }
  }
  
@@ -19186,8 +19554,15 @@ STATIC void
  S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  {
      /* Appends to 'sv' a displayable version of the range of code points from
-     * 'start' to 'end'.  It assumes that only ASCII printables are displayable
-     * as-is (though some of these will be escaped by put_code_point()). */
+     * 'start' to 'end'.  Mnemonics (like '\r') are used for the few controls
+     * that have them, when they occur at the beginning or end of the range.
+     * It uses hex to output the remaining code points, unless 'allow_literals'
+     * is true, in which case the printable ASCII ones are output as-is (though
+     * some of these will be escaped by put_code_point()).
+     *
+     * NOTE:  This is designed only for printing ranges of code points that fit
+     *        inside an ANYOF bitmap.  Higher code points are simply suppressed
+     */
  
      const unsigned int min_range_count = 3;
  
@@ -19201,7 +19576,7 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  
          if (end - start < min_range_count) {
  
-            /* Individual chars in short ranges */
+            /* Output chars individually when they occur in short ranges */
              for (; start <= end; start++) {
                  put_code_point(sv, start);
              }
@@ -19210,11 +19585,11 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  
          /* If permitted by the input options, and there is a possibility that
           * this range contains a printable literal, look to see if there is
-         * one.  */
+         * one. */
          if (allow_literals && start <= MAX_PRINT_A) {
  
-            /* If the range begin isn't an ASCII printable, effectively split
-             * the range into two parts:
+            /* If the character at the beginning of the range isn't an ASCII
+             * printable, effectively split the range into two parts:
               *  1) the portion before the first such printable,
               *  2) the rest
               * and output them separately. */
@@ -19236,18 +19611,18 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
                      temp_end = end + 1;
                  }
  
-                /* Output the first part of the split range, the part that
-                 * doesn't have printables, with no looking for literals
-                 * (otherwise we would infinitely recurse) */
+                /* Output the first part of the split range: the part that
+                 * doesn't have printables, with the parameter set to not look
+                 * for literals (otherwise we would infinitely recurse) */
                  put_range(sv, start, temp_end - 1, FALSE);
  
                  /* The 2nd part of the range (if any) starts here. */
                  start = temp_end;
  
-                /* We continue instead of dropping down because even if the 2nd
-                 * part is non-empty, it could be so short that we want to
-                 * output it specially, as tested for at the top of this loop.
-                 * */
+                /* We do a continue, instead of dropping down, because even if
+                 * the 2nd part is non-empty, it could be so short that we want
+                 * to output it as individual characters, as tested for at the
+                 * top of this loop.  */
                  continue;
              }
  
@@ -19312,7 +19687,8 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
                  temp_end--;
              }
  
-            /* And separately output the range that doesn't have mnemonics */
+            /* And separately output the interior range that doesn't start or
+             * end with mnemonics */
              put_range(sv, start, temp_end, FALSE);
  
              /* Then output the mnemonic trailing controls */
@@ -19331,10 +19707,10 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
                      : NUM_ANYOF_CODE_POINTS - 1;
  #if NUM_ANYOF_CODE_POINTS > 256
          format = (this_end < 256)
-                 ? "\\x{%02"UVXf"}-\\x{%02"UVXf"}"
+                 ? "\\x%02"UVXf"-\\x%02"UVXf""
                   : "\\x{%04"UVXf"}-\\x{%04"UVXf"}";
  #else
-        format = "\\x{%02"UVXf"}-\\x{%02"UVXf"}";
+        format = "\\x%02"UVXf"-\\x%02"UVXf"";
  #endif
          GCC_DIAG_IGNORE(-Wformat-nonliteral);
          Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
@@ -19343,55 +19719,26 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
      }
  }
  
-STATIC bool
-S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
+STATIC void
+S_put_charclass_bitmap_innards_invlist(pTHX_ SV *sv, SV* invlist)
  {
-    /* Appends to 'sv' a displayable version of the innards of the bracketed
-     * character class whose bitmap is 'bitmap';  Returns 'TRUE' if it actually
-     * output anything, and bitmap_invlist, if not NULL, will point to an
-     * inversion list of what is in the bit map */
+    /* Concatenate onto the PV in 'sv' a displayable form of the inversion list
+     * 'invlist' */
  
-    int i;
      UV start, end;
-    unsigned int punct_count = 0;
-    SV* invlist;
      bool allow_literals = TRUE;
-    bool inverted_for_output = FALSE;
-
-    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;
  
-    /* Worst case is exactly every-other code point is in the list */
-    invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
-
-    /* Convert the bit map to an inversion list, keeping track of how many
-     * ASCII puncts are set, including an extra amount for the backslashed
-     * ones.  */
-    for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
-        if (BITMAP_TEST(bitmap, i)) {
-            invlist = add_cp_to_invlist(invlist, i);
-            if (isPUNCT_A(i)) {
-                punct_count++;
-                if isBACKSLASHED_PUNCT(i) {
-                    punct_count++;
-                }
-            }
-        }
-    }
-
-    /* Nothing to output */
-    if (_invlist_len(invlist) == 0) {
-        SvREFCNT_dec_NN(invlist);
-        return FALSE;
-    }
+    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_INVLIST;
  
      /* Generally, it is more readable if printable characters are output as
       * literals, but if a range (nearly) spans all of them, it's best to output
       * it as a single range.  This code will use a single range if all but 2
-     * printables are in it */
+     * ASCII printables are in it */
      invlist_iterinit(invlist);
      while (invlist_iternext(invlist, &start, &end)) {
  
-        /* If range starts beyond final printable, it doesn't have any in it */
+        /* If the range starts beyond the final printable, it doesn't have any
+         * in it */
          if (start > MAX_PRINT_A) {
              break;
          }
@@ -19414,24 +19761,6 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
      }
      invlist_iterfinish(invlist);
  
-    /* The legibility of the output depends mostly on how many punctuation
-     * characters are output.  There are 32 possible ASCII ones, and some have
-     * an additional backslash, bringing it to currently 36, so if any more
-     * than 18 are to be output, we can instead output it as its complement,
-     * yielding fewer puncts, and making it more legible.  But give some weight
-     * to the fact that outputting it as a complement is less legible than a
-     * straight output, so don't complement unless we are somewhat over the 18
-     * mark */
-    if (allow_literals && punct_count > 22) {
-        sv_catpvs(sv, "^");
-
-        /* Add everything remaining to the list, so when we invert it just
-         * below, it will be excluded */
-        _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist);
-        _invlist_invert(invlist);
-        inverted_for_output = TRUE;
-    }
-
      /* Here we have figured things out.  Output each range */
      invlist_iterinit(invlist);
      while (invlist_iternext(invlist, &start, &end)) {
@@ -19442,22 +19771,335 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char *bitmap, SV** bitmap_invlist)
      }
      invlist_iterfinish(invlist);
  
-    if (bitmap_invlist) {
+    return;
+}
+
+STATIC SV*
+S_put_charclass_bitmap_innards_common(pTHX_
+        SV* invlist,            /* The bitmap */
+        SV* posixes,            /* Under /l, things like [:word:], \S */
+        SV* only_utf8,          /* Under /d, matches iff the target is UTF-8 */
+        SV* not_utf8,           /* /d, matches iff the target isn't UTF-8 */
+        SV* only_utf8_locale,   /* Under /l, matches if the locale is UTF-8 */
+        const bool invert       /* Is the result to be inverted? */
+)
+{
+    /* Create and return an SV containing a displayable version of the bitmap
+     * and associated information determined by the input parameters. */
  
-        /* Here, wants the inversion list returned.  If we inverted it, we have
-         * to restore it to the original */
-        if (inverted_for_output) {
-            _invlist_invert(invlist);
-            _invlist_intersection(invlist, PL_InBitmap, &invlist);
-        }
+    SV * output;
+
+    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON;
  
-        *bitmap_invlist = invlist;
+    if (invert) {
+        output = newSVpvs("^");
      }
      else {
-        SvREFCNT_dec_NN(invlist);
+        output = newSVpvs("");
      }
  
-    return TRUE;
+    /* First, the code points in the bitmap that are unconditionally there */
+    put_charclass_bitmap_innards_invlist(output, invlist);
+
+    /* Traditionally, these have been placed after the main code points */
+    if (posixes) {
+        sv_catsv(output, posixes);
+    }
+
+    if (only_utf8 && _invlist_len(only_utf8)) {
+        Perl_sv_catpvf(aTHX_ output, "%s{utf8}%s", PL_colors[1], PL_colors[0]);
+        put_charclass_bitmap_innards_invlist(output, only_utf8);
+    }
+
+    if (not_utf8 && _invlist_len(not_utf8)) {
+        Perl_sv_catpvf(aTHX_ output, "%s{not utf8}%s", PL_colors[1], PL_colors[0]);
+        put_charclass_bitmap_innards_invlist(output, not_utf8);
+    }
+
+    if (only_utf8_locale && _invlist_len(only_utf8_locale)) {
+        Perl_sv_catpvf(aTHX_ output, "%s{utf8 locale}%s", PL_colors[1], PL_colors[0]);
+        put_charclass_bitmap_innards_invlist(output, only_utf8_locale);
+
+        /* This is the only list in this routine that can legally contain code
+         * points outside the bitmap range.  The call just above to
+         * 'put_charclass_bitmap_innards_invlist' will simply suppress them, so
+         * output them here.  There's about a half-dozen possible, and none in
+         * contiguous ranges longer than 2 */
+        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
+            UV start, end;
+            SV* above_bitmap = NULL;
+
+            _invlist_subtract(only_utf8_locale, PL_InBitmap, &above_bitmap);
+
+            invlist_iterinit(above_bitmap);
+            while (invlist_iternext(above_bitmap, &start, &end)) {
+                UV i;
+
+                for (i = start; i <= end; i++) {
+                    put_code_point(output, i);
+                }
+            }
+            invlist_iterfinish(above_bitmap);
+            SvREFCNT_dec_NN(above_bitmap);
+        }
+    }
+
+    /* If the only thing we output is the '^', clear it */
+    if (invert && SvCUR(output) == 1) {
+        SvCUR_set(output, 0);
+    }
+
+    return output;
+}
+
+STATIC bool
+S_put_charclass_bitmap_innards(pTHX_ SV *sv,
+                                     char *bitmap,
+                                     SV *nonbitmap_invlist,
+                                     SV *only_utf8_locale_invlist,
+                                     const regnode * const node)
+{
+    /* Appends to 'sv' a displayable version of the innards of the bracketed
+     * character class defined by the other arguments:
+     *  'bitmap' points to the bitmap.
+     *  'nonbitmap_invlist' is an inversion list of the code points that are in
+     *      the bitmap range, but for some reason aren't in the bitmap; NULL if
+     *      none.  The reasons for this could be that they require some
+     *      condition such as the target string being or not being in UTF-8
+     *      (under /d), or because they came from a user-defined property that
+     *      was not resolved at the time of the regex compilation (under /u)
+     *  'only_utf8_locale_invlist' is an inversion list of the code points that
+     *      are valid only if the runtime locale is a UTF-8 one; NULL if none
+     *  'node' is the regex pattern node.  It is needed only when the above two
+     *      parameters are not null, and is passed so that this routine can
+     *      tease apart the various reasons for them.
+     *
+     * It returns TRUE if there was actually something output.  (It may be that
+     * the bitmap, etc is empty.)
+     *
+     * When called for outputting the bitmap of a non-ANYOF node, just pass the
+     * bitmap, with the succeeding parameters set to NULL.
+     *
+     */
+
+    /* In general, it tries to display the 'cleanest' representation of the
+     * innards, choosing whether to display them inverted or not, regardless of
+     * whether the class itself is to be inverted.  However,  there are some
+     * cases where it can't try inverting, as what actually matches isn't known
+     * until runtime, and hence the inversion isn't either. */
+    bool inverting_allowed = TRUE;
+
+    int i;
+    STRLEN orig_sv_cur = SvCUR(sv);
+
+    SV* invlist;            /* Inversion list we accumulate of code points that
+                               are unconditionally matched */
+    SV* only_utf8 = NULL;   /* Under /d, list of matches iff the target is
+                               UTF-8 */
+    SV* not_utf8 =  NULL;   /* /d, list of matches iff the target isn't UTF-8
+                             */
+    SV* posixes = NULL;     /* Under /l, string of things like [:word:], \D */
+    SV* only_utf8_locale = NULL;    /* Under /l, list of matches if the locale
+                                       is UTF-8 */
+
+    SV* as_is_display;      /* The output string when we take the inputs
+                              literally */
+    SV* inverted_display;   /* The output string when we invert the inputs */
+
+    U8 flags = (node) ? ANYOF_FLAGS(node) : 0;
+
+    bool invert = cBOOL(flags & ANYOF_INVERT);  /* Is the input to be inverted
+                                                   to match? */
+    /* We are biased in favor of displaying things without them being inverted,
+     * as that is generally easier to understand */
+    const int bias = 5;
+
+    PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS;
+
+    /* Start off with whatever code points are passed in.  (We clone, so we
+     * don't change the caller's list) */
+    if (nonbitmap_invlist) {
+        assert(invlist_highest(nonbitmap_invlist) < NUM_ANYOF_CODE_POINTS);
+        invlist = invlist_clone(nonbitmap_invlist);
+    }
+    else {  /* Worst case size is every other code point is matched */
+        invlist = _new_invlist(NUM_ANYOF_CODE_POINTS / 2);
+    }
+
+    if (flags) {
+        if (OP(node) == ANYOFD) {
+
+            /* This flag indicates that the code points below 0x100 in the
+             * nonbitmap list are precisely the ones that match only when the
+             * target is UTF-8 (they should all be non-ASCII). */
+            if (flags & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP)
+            {
+                _invlist_intersection(invlist, PL_UpperLatin1, &only_utf8);
+                _invlist_subtract(invlist, only_utf8, &invlist);
+            }
+
+            /* And this flag for matching all non-ASCII 0xFF and below */
+            if (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
+            {
+                if (invert) {
+                    not_utf8 = _new_invlist(0);
+                }
+                else {
+                    not_utf8 = invlist_clone(PL_UpperLatin1);
+                }
+                inverting_allowed = FALSE;  /* XXX needs more work to be able
+                                               to allow this */
+            }
+        }
+        else if (OP(node) == ANYOFL) {
+
+            /* If either of these flags are set, what matches isn't
+             * determinable except during execution, so don't know enough here
+             * to invert */
+            if (flags & (ANYOFL_FOLD|ANYOF_MATCHES_POSIXL)) {
+                inverting_allowed = FALSE;
+            }
+
+            /* What the posix classes match also varies at runtime, so these
+             * will be output symbolically. */
+            if (ANYOF_POSIXL_TEST_ANY_SET(node)) {
+                int i;
+
+                posixes = newSVpvs("");
+                for (i = 0; i < ANYOF_POSIXL_MAX; i++) {
+                    if (ANYOF_POSIXL_TEST(node,i)) {
+                        sv_catpv(posixes, anyofs[i]);
+                    }
+                }
+            }
+        }
+    }
+
+    /* Accumulate the bit map into the unconditional match list */
+    for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
+        if (BITMAP_TEST(bitmap, i)) {
+            int start = i++;
+            for (; i < NUM_ANYOF_CODE_POINTS && BITMAP_TEST(bitmap, i); i++) {
+                /* empty */
+            }
+            invlist = _add_range_to_invlist(invlist, start, i-1);
+        }
+    }
+
+    /* Make sure that the conditional match lists don't have anything in them
+     * that match unconditionally; otherwise the output is quite confusing.
+     * This could happen if the code that populates these misses some
+     * duplication. */
+    if (only_utf8) {
+        _invlist_subtract(only_utf8, invlist, &only_utf8);
+    }
+    if (not_utf8) {
+        _invlist_subtract(not_utf8, invlist, &not_utf8);
+    }
+
+    if (only_utf8_locale_invlist) {
+
+        /* Since this list is passed in, we have to make a copy before
+         * modifying it */
+        only_utf8_locale = invlist_clone(only_utf8_locale_invlist);
+
+        _invlist_subtract(only_utf8_locale, invlist, &only_utf8_locale);
+
+        /* And, it can get really weird for us to try outputting an inverted
+         * form of this list when it has things above the bitmap, so don't even
+         * try */
+        if (invlist_highest(only_utf8_locale) >= NUM_ANYOF_CODE_POINTS) {
+            inverting_allowed = FALSE;
+        }
+    }
+
+    /* Calculate what the output would be if we take the input as-is */
+    as_is_display = put_charclass_bitmap_innards_common(invlist,
+                                                    posixes,
+                                                    only_utf8,
+                                                    not_utf8,
+                                                    only_utf8_locale,
+                                                    invert);
+
+    /* If have to take the output as-is, just do that */
+    if (! inverting_allowed) {
+        sv_catsv(sv, as_is_display);
+    }
+    else { /* But otherwise, create the output again on the inverted input, and
+              use whichever version is shorter */
+
+        int inverted_bias, as_is_bias;
+
+        /* We will apply our bias to whichever of the the results doesn't have
+         * the '^' */
+        if (invert) {
+            invert = FALSE;
+            as_is_bias = bias;
+            inverted_bias = 0;
+        }
+        else {
+            invert = TRUE;
+            as_is_bias = 0;
+            inverted_bias = bias;
+        }
+
+        /* Now invert each of the lists that contribute to the output,
+         * excluding from the result things outside the possible range */
+
+        /* For the unconditional inversion list, we have to add in all the
+         * conditional code points, so that when inverted, they will be gone
+         * from it */
+        _invlist_union(only_utf8, invlist, &invlist);
+        _invlist_union(only_utf8_locale, invlist, &invlist);
+        _invlist_invert(invlist);
+        _invlist_intersection(invlist, PL_InBitmap, &invlist);
+
+        if (only_utf8) {
+            _invlist_invert(only_utf8);
+            _invlist_intersection(only_utf8, PL_UpperLatin1, &only_utf8);
+        }
+
+        if (not_utf8) {
+            _invlist_invert(not_utf8);
+            _invlist_intersection(not_utf8, PL_UpperLatin1, &not_utf8);
+        }
+
+        if (only_utf8_locale) {
+            _invlist_invert(only_utf8_locale);
+            _invlist_intersection(only_utf8_locale,
+                                  PL_InBitmap,
+                                  &only_utf8_locale);
+        }
+
+        inverted_display = put_charclass_bitmap_innards_common(
+                                            invlist,
+                                            posixes,
+                                            only_utf8,
+                                            not_utf8,
+                                            only_utf8_locale, invert);
+
+        /* Use the shortest representation, taking into account our bias
+         * against showing it inverted */
+        if (SvCUR(inverted_display) + inverted_bias
+            < SvCUR(as_is_display) + as_is_bias)
+        {
+           sv_catsv(sv, inverted_display);
+        }
+        else {
+           sv_catsv(sv, as_is_display);
+        }
+
+        SvREFCNT_dec_NN(as_is_display);
+        SvREFCNT_dec_NN(inverted_display);
+    }
+
+    SvREFCNT_dec_NN(invlist);
+    SvREFCNT_dec(only_utf8);
+    SvREFCNT_dec(not_utf8);
+    SvREFCNT_dec(posixes);
+    SvREFCNT_dec(only_utf8_locale);
+
+    return SvCUR(sv) > orig_sv_cur;
  }
  
  #define CLEAR_OPTSTART \