concise.t: work with PERL_UNICODE=""

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 5dff17c..bba5a2b 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -199,6 +199,7 @@ struct RExC_state_t {
      scan_frame *frame_head;
      scan_frame *frame_last;
      U32         frame_count;
      scan_frame *frame_head;
      scan_frame *frame_last;
      U32         frame_count;
+    AV         *warn_text;
  #ifdef ADD_TO_REGEXEC
      char       *starttry;              /* -Dr: where regtry was called. */
  #define RExC_starttry  (pRExC_state->starttry)
  #ifdef ADD_TO_REGEXEC
      char       *starttry;              /* -Dr: where regtry was called. */
  #define RExC_starttry  (pRExC_state->starttry)
@@ -222,6 +223,7 @@ struct RExC_state_t {
  #endif
      bool        seen_unfolded_sharp_s;
      bool        strict;
  #endif
      bool        seen_unfolded_sharp_s;
      bool        strict;
+    bool        study_started;
  };
  
  #define RExC_flags     (pRExC_state->flags)
  };
  
  #define RExC_flags     (pRExC_state->flags)
@@ -288,6 +290,8 @@ struct RExC_state_t {
  #define RExC_frame_last (pRExC_state->frame_last)
  #define RExC_frame_count (pRExC_state->frame_count)
  #define RExC_strict (pRExC_state->strict)
  #define RExC_frame_last (pRExC_state->frame_last)
  #define RExC_frame_count (pRExC_state->frame_count)
  #define RExC_strict (pRExC_state->strict)
+#define RExC_study_started      (pRExC_state->study_started)
+#define RExC_warn_text (pRExC_state->warn_text)
  
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
   * a flag to disable back-off on the fixed/floating substrings - if it's
  
  /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
   * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -502,7 +506,20 @@ static const scan_data_t zero_scan_data =
  #define SF_HAS_PAR             0x0080
  #define SF_IN_PAR              0x0100
  #define SF_HAS_EVAL            0x0200
  #define SF_HAS_PAR             0x0080
  #define SF_IN_PAR              0x0100
  #define SF_HAS_EVAL            0x0200
+
+
+/* SCF_DO_SUBSTR is the flag that tells the regexp analyzer to track the
+ * longest substring in the pattern. When it is not set the optimiser keeps
+ * track of position, but does not keep track of the actual strings seen,
+ *
+ * So for instance /foo/ will be parsed with SCF_DO_SUBSTR being true, but
+ * /foo/i will not.
+ *
+ * Similarly, /foo.*(blah|erm|huh).*fnorble/ will have "foo" and "fnorble"
+ * parsed with SCF_DO_SUBSTR on, but while processing the (...) it will be
+ * turned off because of the alternation (BRANCH). */
  #define SCF_DO_SUBSTR          0x0400
  #define SCF_DO_SUBSTR          0x0400
+
  #define SCF_DO_STCLASS_AND     0x0800
  #define SCF_DO_STCLASS_OR      0x1000
  #define SCF_DO_STCLASS         (SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
  #define SCF_DO_STCLASS_AND     0x0800
  #define SCF_DO_STCLASS_OR      0x1000
  #define SCF_DO_STCLASS         (SCF_DO_STCLASS_AND|SCF_DO_STCLASS_OR)
@@ -920,7 +937,7 @@ Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...)
      PerlIO *f= Perl_debug_log;
      PERL_ARGS_ASSERT_RE_INDENTF;
      va_start(ap, depth);
      PerlIO *f= Perl_debug_log;
      PERL_ARGS_ASSERT_RE_INDENTF;
      va_start(ap, depth);
-    PerlIO_printf(f, "%*s", ( depth % 20 ) * 2, "");
+    PerlIO_printf(f, "%*s", ( (int)depth % 20 ) * 2, "");
      result = PerlIO_vprintf(f, fmt, ap);
      va_end(ap);
      return result;
      result = PerlIO_vprintf(f, fmt, ap);
      va_end(ap);
      return result;
@@ -1266,8 +1283,8 @@ S_ssc_anything(pTHX_ regnode_ssc *ssc)
  
      assert(is_ANYOF_SYNTHETIC(ssc));
  
  
      assert(is_ANYOF_SYNTHETIC(ssc));
  
-    ssc->invlist = sv_2mortal(_new_invlist(2)); /* mortalize so won't leak */
-    _append_range_to_invlist(ssc->invlist, 0, UV_MAX);
+    /* mortalize so won't leak */
+    ssc->invlist = sv_2mortal(_add_range_to_invlist(NULL, 0, UV_MAX));
      ANYOF_FLAGS(ssc) |= SSC_MATCHES_EMPTY_STRING;  /* Plus matches empty */
  }
  
      ANYOF_FLAGS(ssc) |= SSC_MATCHES_EMPTY_STRING;  /* Plus matches empty */
  }
  
@@ -2153,7 +2170,8 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
          }
      }
  
          }
      }
  
-    Perl_re_printf( aTHX_  "\n%*sState+-",depth+1 );
+    Perl_re_printf( aTHX_ "\n");
+    Perl_re_indentf( aTHX_  "State+-", depth+1 );
  
      for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) {
          Perl_re_printf( aTHX_  "%.*s", colwidth,"--------");
  
      for( charid=0 ; charid < trie->uniquecharcount ; charid++ ) {
          Perl_re_printf( aTHX_  "%.*s", colwidth,"--------");
@@ -4088,6 +4106,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_STUDY_CHUNK;
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_STUDY_CHUNK;
+    RExC_study_started= 1;
  
  
      if ( depth == 0 ) {
  
  
      if ( depth == 0 ) {
@@ -4144,9 +4163,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
          DEBUG_PEEP("Peep", scan, depth);
  
  
          DEBUG_PEEP("Peep", scan, depth);
  
  
-        /* The reason we do this here we need to deal with things like /(?:f)(?:o)(?:o)/
-         * which cant be dealt with by the normal EXACT parsing code, as each (?:..) is handled
-         * by a different invocation of reg() -- Yves
+        /* The reason we do this here is that we need to deal with things like
+         * /(?:f)(?:o)(?:o)/ which cant be dealt with by the normal EXACT
+         * parsing code, as each (?:..) is handled by a different invocation of
+         * reg() -- Yves
           */
          JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
  
           */
          JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
  
@@ -5884,15 +5904,10 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
         /* Else: zero-length, ignore. */
         scan = regnext(scan);
      }
         /* Else: zero-length, ignore. */
         scan = regnext(scan);
      }
-    /* If we are exiting a recursion we can unset its recursed bit
-     * and allow ourselves to enter it again - no danger of an
-     * infinite loop there.
-    if (stopparen > -1 && recursed) {
-       DEBUG_STUDYDATA("unset:", data,depth);
-        PAREN_UNSET( recursed, stopparen);
-    }
-    */
+
+  finish:
      if (frame) {
      if (frame) {
+        /* we need to unwind recursion. */
          depth = depth - 1;
  
          DEBUG_STUDYDATA("frame-end:",data,depth);
          depth = depth - 1;
  
          DEBUG_STUDYDATA("frame-end:",data,depth);
@@ -5909,7 +5924,6 @@ Perl_re_printf( aTHX_  "LHS=%"UVuf" RHS=%"UVuf"\n",
          goto fake_study_recurse;
      }
  
          goto fake_study_recurse;
      }
  
-  finish:
      assert(!frame);
      DEBUG_STUDYDATA("pre-fin:",data,depth);
  
      assert(!frame);
      DEBUG_STUDYDATA("pre-fin:",data,depth);
  
@@ -6752,6 +6766,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  #endif
      }
  
  #endif
      }
  
+    pRExC_state->warn_text = NULL;
      pRExC_state->code_blocks = NULL;
      pRExC_state->num_code_blocks = 0;
  
      pRExC_state->code_blocks = NULL;
      pRExC_state->num_code_blocks = 0;
  
@@ -6868,6 +6883,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_contains_locale = 0;
      RExC_contains_i = 0;
      RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
      RExC_contains_locale = 0;
      RExC_contains_i = 0;
      RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
+    RExC_study_started = 0;
      pRExC_state->runtime_code_qr = NULL;
      RExC_frame_head= NULL;
      RExC_frame_last= NULL;
      pRExC_state->runtime_code_qr = NULL;
      RExC_frame_head= NULL;
      RExC_frame_last= NULL;
@@ -8308,33 +8324,47 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
   * as an SVt_INVLIST scalar.
   *
   * An inversion list for Unicode is an array of code points, sorted by ordinal
   * as an SVt_INVLIST scalar.
   *
   * An inversion list for Unicode is an array of code points, sorted by ordinal
- * number.  The zeroth element is the first code point in the list.  The 1th
- * element is the first element beyond that not in the list.  In other words,
- * the first range is
- *  invlist[0]..(invlist[1]-1)
- * The other ranges follow.  Thus every element whose index is divisible by two
- * marks the beginning of a range that is in the list, and every element not
- * divisible by two marks the beginning of a range not in the list.  A single
- * element inversion list that contains the single code point N generally
- * consists of two elements
- *  invlist[0] == N
- *  invlist[1] == N+1
- * (The exception is when N is the highest representable value on the
- * machine, in which case the list containing just it would be a single
- * element, itself.  By extension, if the last range in the list extends to
- * infinity, then the first element of that range will be in the inversion list
- * at a position that is divisible by two, and is the final element in the
- * list.)
+ * number.  Each element gives the code point that begins a range that extends
+ * up-to but not including the code point given by the next element.  The final
+ * element gives the first code point of a range that extends to the platform's
+ * infinity.  The even-numbered elements (invlist[0], invlist[2], invlist[4],
+ * ...) give ranges whose code points are all in the inversion list.  We say
+ * that those ranges are in the set.  The odd-numbered elements give ranges
+ * whose code points are not in the inversion list, and hence not in the set.
+ * Thus, element [0] is the first code point in the list.  Element [1]
+ * is the first code point beyond that not in the list; and element [2] is the
+ * first code point beyond that that is in the list.  In other words, the first
+ * range is invlist[0]..(invlist[1]-1), and all code points in that range are
+ * in the inversion list.  The second range is invlist[1]..(invlist[2]-1), and
+ * all code points in that range are not in the inversion list.  The third
+ * range invlist[2]..(invlist[3]-1) gives code points that are in the inversion
+ * list, and so forth.  Thus every element whose index is divisible by two
+ * gives the beginning of a range that is in the list, and every element whose
+ * index is not divisible by two gives the beginning of a range not in the
+ * list.  If the final element's index is divisible by two, the inversion list
+ * extends to the platform's infinity; otherwise the highest code point in the
+ * inversion list is the contents of that element minus 1.
+ *
+ * A range that contains just a single code point N will look like
+ *  invlist[i]   == N
+ *  invlist[i+1] == N+1
+ *
+ * If N is UV_MAX (the highest representable code point on the machine), N+1 is
+ * impossible to represent, so element [i+1] is omitted.  The single element
+ * inversion list
+ *  invlist[0] == UV_MAX
+ * contains just UV_MAX, but is interpreted as matching to infinity.
+ *
   * Taking the complement (inverting) an inversion list is quite simple, if the
   * first element is 0, remove it; otherwise add a 0 element at the beginning.
   * This implementation reserves an element at the beginning of each inversion
   * list to always contain 0; there is an additional flag in the header which
   * indicates if the list begins at the 0, or is offset to begin at the next
   * Taking the complement (inverting) an inversion list is quite simple, if the
   * first element is 0, remove it; otherwise add a 0 element at the beginning.
   * This implementation reserves an element at the beginning of each inversion
   * list to always contain 0; there is an additional flag in the header which
   * indicates if the list begins at the 0, or is offset to begin at the next
- * element.
+ * element.  This means that the inversion list can be inverted without any
+ * copying; just flip the flag.
   *
   * More about inversion lists can be found in "Unicode Demystified"
   * Chapter 13 by Richard Gillam, published by Addison-Wesley.
   *
   * More about inversion lists can be found in "Unicode Demystified"
   * Chapter 13 by Richard Gillam, published by Addison-Wesley.
- * More will be coming when functionality is added later.
   *
   * The inversion list data structure is currently implemented as an SV pointing
   * to an array of UVs that the SV thinks are bytes.  This allows us to have an
   *
   * The inversion list data structure is currently implemented as an SV pointing
   * to an array of UVs that the SV thinks are bytes.  This allows us to have an
@@ -8346,6 +8376,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  
  /* The header definitions are in F<invlist_inline.h> */
  
  
  /* The header definitions are in F<invlist_inline.h> */
  
+#ifndef PERL_IN_XSUB_RE
+
  PERL_STATIC_INLINE UV*
  S__invlist_array_init(SV* const invlist, const bool will_have_0)
  {
  PERL_STATIC_INLINE UV*
  S__invlist_array_init(SV* const invlist, const bool will_have_0)
  {
@@ -8372,6 +8404,8 @@ S__invlist_array_init(SV* const invlist, const bool will_have_0)
      return zero_addr + *offset;
  }
  
      return zero_addr + *offset;
  }
  
+#endif
+
  PERL_STATIC_INLINE void
  S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  {
  PERL_STATIC_INLINE void
  S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  {
@@ -8508,6 +8542,8 @@ S_invlist_is_iterating(SV* const invlist)
      return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
  }
  
      return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
  }
  
+#ifndef PERL_IN_XSUB_RE
+
  PERL_STATIC_INLINE UV
  S_invlist_max(SV* const invlist)
  {
  PERL_STATIC_INLINE UV
  S_invlist_max(SV* const invlist)
  {
@@ -8524,8 +8560,6 @@ S_invlist_max(SV* const invlist)
             ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
             : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
  }
             ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
             : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
  }
-
-#ifndef PERL_IN_XSUB_RE
  SV*
  Perl__new_invlist(pTHX_ IV initial_size)
  {
  SV*
  Perl__new_invlist(pTHX_ IV initial_size)
  {
@@ -8611,7 +8645,6 @@ Perl__new_invlist_C_array(pTHX_ const UV* const list)
  
      return invlist;
  }
  
      return invlist;
  }
-#endif /* ifndef PERL_IN_XSUB_RE */
  
  STATIC void
  S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
  
  STATIC void
  S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
@@ -8655,7 +8688,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
  
         UV final_element = len - 1;
         array = invlist_array(invlist);
  
         UV final_element = len - 1;
         array = invlist_array(invlist);
-       if (array[final_element] > start
+       if (   array[final_element] > start
             || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
         {
             Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%"UVuf", start=%"UVuf", match=%c",
             || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
         {
             Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%"UVuf", start=%"UVuf", match=%c",
@@ -8663,10 +8696,10 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
                      ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f');
         }
  
                      ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f');
         }
  
-       /* Here, it is a legal append.  If the new range begins with the first
-        * value not in the set, it is extending the set, so the new first
-        * value not in the set is one greater than the newly extended range.
-        * */
+        /* Here, it is a legal append.  If the new range begins 1 above the end
+         * of the range below it, it is extending the range below it, so the
+         * new first value not in the set is one greater than the newly
+         * extended range.  */
          offset = *get_invlist_offset_addr(invlist);
         if (array[final_element] == start) {
             if (end != UV_MAX) {
          offset = *get_invlist_offset_addr(invlist);
         if (array[final_element] == start) {
             if (end != UV_MAX) {
@@ -8674,7 +8707,8 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
             }
             else {
                 /* But if the end is the maximum representable on the machine,
             }
             else {
                 /* But if the end is the maximum representable on the machine,
-                * just let the range that this would extend to have no end */
+                 * assume that infinity was actually what was meant.  Just let
+                 * the range that this would extend to have no end */
                 invlist_set_len(invlist, len - 1, offset);
             }
             return;
                 invlist_set_len(invlist, len - 1, offset);
             }
             return;
@@ -8712,9 +8746,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
      }
  }
  
      }
  }
  
-#ifndef PERL_IN_XSUB_RE
-
-IV
+SSize_t
  Perl__invlist_search(SV* const invlist, const UV cp)
  {
      /* Searches the inversion list for the entry that contains the input code
  Perl__invlist_search(SV* const invlist, const UV cp)
  {
      /* Searches the inversion list for the entry that contains the input code
@@ -8918,13 +8950,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
       * length there.  The preface says to incorporate its examples into your
       * code at your own risk.
       *
       * length there.  The preface says to incorporate its examples into your
       * code at your own risk.
       *
-     * The algorithm is like a merge sort.
-     *
-     * XXX A potential performance improvement is to keep track as we go along
-     * if only one of the inputs contributes to the result, meaning the other
-     * is a subset of that one.  In that case, we can skip the final copy and
-     * return the larger of the input lists, but then outside code might need
-     * to keep track of whether to free the input list or not */
+     * The algorithm is like a merge sort. */
  
      const UV* array_a;    /* a's array */
      const UV* array_b;
  
      const UV* array_a;    /* a's array */
      const UV* array_b;
@@ -8941,10 +8967,12 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
      /* running count, as explained in the algorithm source book; items are
       * stopped accumulating and are output when the count changes to/from 0.
  
      /* running count, as explained in the algorithm source book; items are
       * stopped accumulating and are output when the count changes to/from 0.
-     * The count is incremented when we start a range that's in the set, and
-     * decremented when we start a range that's not in the set.  So its range
-     * is 0 to 2.  Only when the count is zero is something not in the set.
-     */
+     * The count is incremented when we start a range that's in an input's set,
+     * and decremented when we start a range that's not in a set.  So this
+     * variable can be 0, 1, or 2.  When it is 0 neither input is in their set,
+     * and hence nothing goes into the union; 1, just one of the inputs is in
+     * its set (and its current range gets added to the union); and 2 when both
+     * inputs are in their sets.  */
      UV count = 0;
  
      PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
      UV count = 0;
  
      PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
@@ -8958,8 +8986,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
           * It's easiest to create a new inversion list that matches everything.
           * */
          if (complement_b) {
           * It's easiest to create a new inversion list that matches everything.
           * */
          if (complement_b) {
-            SV* everything = _new_invlist(1);
-            _append_range_to_invlist(everything, 0, UV_MAX);
+            SV* everything = _add_range_to_invlist(NULL, 0, UV_MAX);
  
              /* If the output didn't exist, just point it at the new list */
              if (*output == NULL) {
  
              /* If the output didn't exist, just point it at the new list */
              if (*output == NULL) {
@@ -9007,7 +9034,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
          invlist_replace_list_destroys_src(*output, u);
          SvREFCNT_dec_NN(u);
  
          invlist_replace_list_destroys_src(*output, u);
          SvREFCNT_dec_NN(u);
  
-       return;
+        return;
      }
  
      if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
      }
  
      if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
@@ -9078,10 +9105,10 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      u = _new_invlist(len_a + len_b);
  
      /* Will contain U+0000 if either component does */
      u = _new_invlist(len_a + len_b);
  
      /* Will contain U+0000 if either component does */
-    array_u = _invlist_array_init(u, (len_a > 0 && array_a[0] == 0)
-                                     || (len_b > 0 && array_b[0] == 0));
+    array_u = _invlist_array_init(u, (    len_a > 0 && array_a[0] == 0)
+                                      || (len_b > 0 && array_b[0] == 0));
  
  
-    /* Go through each list item by item, stopping when exhausted one of
+    /* Go through each input list item by item, stopping when exhausted one of
       * them */
      while (i_a < len_a && i_b < len_b) {
         UV cp;      /* The element to potentially add to the union's array */
       * them */
      while (i_a < len_a && i_b < len_b) {
         UV cp;      /* The element to potentially add to the union's array */
@@ -9089,21 +9116,21 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
         /* We need to take one or the other of the two inputs for the union.
          * Since we are merging two sorted lists, we take the smaller of the
  
         /* We need to take one or the other of the two inputs for the union.
          * Since we are merging two sorted lists, we take the smaller of the
-        * next items.  In case of a tie, we take the one that is in its set
-        * first.  If we took one not in the set first, it would decrement the
-        * count, possibly to 0 which would cause it to be output as ending the
-        * range, and the next time through we would take the same number, and
-        * output it again as beginning the next range.  By doing it the
-        * opposite way, there is no possibility that the count will be
-        * momentarily decremented to 0, and thus the two adjoining ranges will
-        * be seamlessly merged.  (In a tie and both are in the set or both not
-        * in the set, it doesn't matter which we take first.) */
-       if (array_a[i_a] < array_b[i_b]
-           || (array_a[i_a] == array_b[i_b]
+         * next items.  In case of a tie, we take first the one that is in its
+         * set.  If we first took the one not in its set, it would decrement
+         * the count, possibly to 0 which would cause it to be output as ending
+         * the range, and the next time through we would take the same number,
+         * and output it again as beginning the next range.  By doing it the
+         * opposite way, there is no possibility that the count will be
+         * momentarily decremented to 0, and thus the two adjoining ranges will
+         * be seamlessly merged.  (In a tie and both are in the set or both not
+         * in the set, it doesn't matter which we take first.) */
+       if (       array_a[i_a] < array_b[i_b]
+           || (   array_a[i_a] == array_b[i_b]
                 && ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
         {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
                 && ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
         {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
-           cp= array_a[i_a++];
+           cp = array_a[i_a++];
         }
         else {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
         }
         else {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
@@ -9127,39 +9154,53 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         }
      }
  
         }
      }
  
-    /* Here, we are finished going through at least one of the lists, which
-     * means there is something remaining in at most one.  We check if the list
-     * that hasn't been exhausted is positioned such that we are in the middle
-     * of a range in its set or not.  (i_a and i_b point to the element beyond
-     * the one we care about.) If in the set, we decrement 'count'; if 0, there
-     * is potentially more to output.
-     * There are four cases:
-     * 1) Both weren't in their sets, count is 0, and remains 0.  What's left
-     *    in the union is entirely from the non-exhausted set.
-     * 2) Both were in their sets, count is 2.  Nothing further should
-     *    be output, as everything that remains will be in the exhausted
-     *    list's set, hence in the union; decrementing to 1 but not 0 insures
-     *    that
-     * 3) the exhausted was in its set, non-exhausted isn't, count is 1.
-     *    Nothing further should be output because the union includes
-     *    everything from the exhausted set.  Not decrementing ensures that.
-     * 4) the exhausted wasn't in its set, non-exhausted is, count is 1;
-     *    decrementing to 0 insures that we look at the remainder of the
-     *    non-exhausted set */
+
+    /* The loop above increments the index into exactly one of the input lists
+     * each iteration, and ends when either index gets to its list end.  That
+     * means the other index is lower than its end, and so something is
+     * remaining in that one.  We decrement 'count', as explained below, if
+     * that list is in its set.  (i_a and i_b each currently index the element
+     * beyond the one we care about.) */
      if (   (i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
         || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count--;
      }
  
      if (   (i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
         || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count--;
      }
  
-    /* The final length is what we've output so far, plus what else is about to
-     * be output.  (If 'count' is non-zero, then the input list we exhausted
-     * has everything remaining up to the machine's limit in its set, and hence
-     * in the union, so there will be no further output. */
-    len_u = i_u;
-    if (count == 0) {
-       /* At most one of the subexpressions will be non-zero */
-       len_u += (len_a - i_a) + (len_b - i_b);
+    /* Above we decremented 'count' if the list that had unexamined elements in
+     * it was in its set.  This has made it so that 'count' being non-zero
+     * means there isn't anything left to output; and 'count' equal to 0 means
+     * that what is left to output is precisely that which is left in the
+     * non-exhausted input list.
+     *
+     * To see why, note first that the exhausted input obviously has nothing
+     * left to add to the union.  If it was in its set at its end, that means
+     * the set extends from here to the platform's infinity, and hence so does
+     * the union and the non-exhausted set is irrelevant.  The exhausted set
+     * also contributed 1 to 'count'.  If 'count' was 2, it got decremented to
+     * 1, but if it was 1, the non-exhausted set wasn't in its set, and so
+     * 'count' remains at 1.  This is consistent with the decremented 'count'
+     * != 0 meaning there's nothing left to add to the union.
+     *
+     * But if the exhausted input wasn't in its set, it contributed 0 to
+     * 'count', and the rest of the union will be whatever the other input is.
+     * If 'count' was 0, neither list was in its set, and 'count' remains 0;
+     * otherwise it gets decremented to 0.  This is consistent with 'count'
+     * == 0 meaning the remainder of the union is whatever is left in the
+     * non-exhausted list. */
+    if (count != 0) {
+        len_u = i_u;
+    }
+    else {
+        IV copy_count = len_a - i_a;
+        if (copy_count > 0) {   /* The non-exhausted input is 'a' */
+           Copy(array_a + i_a, array_u + i_u, copy_count, UV);
+        }
+        else { /* The non-exhausted input is b */
+            copy_count = len_b - i_b;
+           Copy(array_b + i_b, array_u + i_u, copy_count, UV);
+        }
+        len_u = i_u + copy_count;
      }
  
      /* Set the result to the final length, which can change the pointer to
      }
  
      /* Set the result to the final length, which can change the pointer to
@@ -9171,22 +9212,6 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
         array_u = invlist_array(u);
      }
  
         array_u = invlist_array(u);
      }
  
-    /* When 'count' is 0, the list that was exhausted (if one was shorter than
-     * the other) ended with everything above it not in its set.  That means
-     * that the remaining part of the union is precisely the same as the
-     * non-exhausted list, so can just copy it unchanged.  (If both lists were
-     * exhausted at the same time, then the operations below will be both 0.)
-     */
-    if (count == 0) {
-       IV copy_count; /* At most one will have a non-zero copy count */
-       if ((copy_count = len_a - i_a) > 0) {
-           Copy(array_a + i_a, array_u + i_u, copy_count, UV);
-       }
-       else if ((copy_count = len_b - i_b) > 0) {
-           Copy(array_b + i_b, array_u + i_u, copy_count, UV);
-       }
-    }
-
      /* If the output is not to overwrite either of the inputs, just return the
       * calculated union */
      if (a != *output && b != *output) {
      /* If the output is not to overwrite either of the inputs, just return the
       * calculated union */
      if (a != *output && b != *output) {
@@ -9199,7 +9224,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
           *  shown [perl #127392] that if the input is a mortal, we can get a
           *  huge build-up of these during regex compilation before they get
           *  freed.  So for that case, replace just the input's interior with
           *  shown [perl #127392] that if the input is a mortal, we can get a
           *  huge build-up of these during regex compilation before they get
           *  freed.  So for that case, replace just the input's interior with
-         *  the output's, and then free the output */
+         *  the union's, and then free the union */
  
          assert(! invlist_is_iterating(*output));
  
  
          assert(! invlist_is_iterating(*output));
  
@@ -9251,12 +9276,12 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      UV i_b = 0;
      UV i_r = 0;
  
      UV i_b = 0;
      UV i_r = 0;
  
-    /* running count, as explained in the algorithm source book; items are
-     * stopped accumulating and are output when the count changes to/from 2.
-     * The count is incremented when we start a range that's in the set, and
-     * decremented when we start a range that's not in the set.  So its range
-     * is 0 to 2.  Only when the count is 2 is something in the intersection.
-     */
+    /* running count of how many of the two inputs are postitioned at ranges
+     * that are in their sets.  As explained in the algorithm source book,
+     * items are stopped accumulating and are output when the count changes
+     * to/from 2.  The count is incremented when we start a range that's in an
+     * input's set, and decremented when we start a range that's not in a set.
+     * Only when it is 2 are we in the intersection. */
      UV count = 0;
  
      PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
      UV count = 0;
  
      PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
@@ -9328,8 +9353,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      r= _new_invlist(len_a + len_b);
  
      /* Will contain U+0000 iff both components do */
      r= _new_invlist(len_a + len_b);
  
      /* Will contain U+0000 iff both components do */
-    array_r = _invlist_array_init(r, len_a > 0 && array_a[0] == 0
-                                    && len_b > 0 && array_b[0] == 0);
+    array_r = _invlist_array_init(r,    len_a > 0 && array_a[0] == 0
+                                     && len_b > 0 && array_b[0] == 0);
  
      /* Go through each list item by item, stopping when exhausted one of
       * them */
  
      /* Go through each list item by item, stopping when exhausted one of
       * them */
@@ -9340,21 +9365,21 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  
         /* We need to take one or the other of the two inputs for the
          * intersection.  Since we are merging two sorted lists, we take the
  
         /* We need to take one or the other of the two inputs for the
          * intersection.  Since we are merging two sorted lists, we take the
-        * smaller of the next items.  In case of a tie, we take the one that
-        * is not in its set first (a difference from the union algorithm).  If
-        * we took one in the set first, it would increment the count, possibly
-        * to 2 which would cause it to be output as starting a range in the
-        * intersection, and the next time through we would take that same
-        * number, and output it again as ending the set.  By doing it the
-        * opposite of this, there is no possibility that the count will be
-        * momentarily incremented to 2.  (In a tie and both are in the set or
-        * both not in the set, it doesn't matter which we take first.) */
-       if (array_a[i_a] < array_b[i_b]
-           || (array_a[i_a] == array_b[i_b]
+         * smaller of the next items.  In case of a tie, we take first the one
+         * that is not in its set (a difference from the union algorithm).  If
+         * we first took the one in its set, it would increment the count,
+         * possibly to 2 which would cause it to be output as starting a range
+         * in the intersection, and the next time through we would take that
+         * same number, and output it again as ending the set.  By doing the
+         * opposite of this, there is no possibility that the count will be
+         * momentarily incremented to 2.  (In a tie and both are in the set or
+         * both not in the set, it doesn't matter which we take first.) */
+       if (       array_a[i_a] < array_b[i_b]
+           || (   array_a[i_a] == array_b[i_b]
                 && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
         {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
                 && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
         {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
-           cp= array_a[i_a++];
+           cp = array_a[i_a++];
         }
         else {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
         }
         else {
             cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
@@ -9376,36 +9401,55 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
             }
             count--;
         }
             }
             count--;
         }
+
      }
  
      }
  
-    /* Here, we are finished going through at least one of the lists, which
-     * means there is something remaining in at most one.  We check if the list
-     * that has been exhausted is positioned such that we are in the middle
-     * of a range in its set or not.  (i_a and i_b point to elements 1 beyond
-     * the ones we care about.)  There are four cases:
-     * 1) Both weren't in their sets, count is 0, and remains 0.  There's
-     *    nothing left in the intersection.
-     * 2) Both were in their sets, count is 2 and perhaps is incremented to
-     *    above 2.  What should be output is exactly that which is in the
-     *    non-exhausted set, as everything it has is also in the intersection
-     *    set, and everything it doesn't have can't be in the intersection
-     * 3) The exhausted was in its set, non-exhausted isn't, count is 1, and
-     *    gets incremented to 2.  Like the previous case, the intersection is
-     *    everything that remains in the non-exhausted set.
-     * 4) the exhausted wasn't in its set, non-exhausted is, count is 1, and
-     *    remains 1.  And the intersection has nothing more. */
+    /* The loop above increments the index into exactly one of the input lists
+     * each iteration, and ends when either index gets to its list end.  That
+     * means the other index is lower than its end, and so something is
+     * remaining in that one.  We increment 'count', as explained below, if the
+     * exhausted list was in its set.  (i_a and i_b each currently index the
+     * element beyond the one we care about.) */
      if (   (i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
      if (   (i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
-       || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
+        || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count++;
      }
  
      {
         count++;
      }
  
-    /* The final length is what we've output so far plus what else is in the
-     * intersection.  At most one of the subexpressions below will be non-zero
-     * */
-    len_r = i_r;
-    if (count >= 2) {
-       len_r += (len_a - i_a) + (len_b - i_b);
+    /* Above we incremented 'count' if the exhausted list was in its set.  This
+     * has made it so that 'count' being below 2 means there is nothing left to
+     * output; otheriwse what's left to add to the intersection is precisely
+     * that which is left in the non-exhausted input list.
+     *
+     * To see why, note first that the exhausted input obviously has nothing
+     * left to affect the intersection.  If it was in its set at its end, that
+     * means the set extends from here to the platform's infinity, and hence
+     * anything in the non-exhausted's list will be in the intersection, and
+     * anything not in it won't be.  Hence, the rest of the intersection is
+     * precisely what's in the non-exhausted list  The exhausted set also
+     * contributed 1 to 'count', meaning 'count' was at least 1.  Incrementing
+     * it means 'count' is now at least 2.  This is consistent with the
+     * incremented 'count' being >= 2 means to add the non-exhausted list to
+     * the intersection.
+     *
+     * But if the exhausted input wasn't in its set, it contributed 0 to
+     * 'count', and the intersection can't include anything further; the
+     * non-exhausted set is irrelevant.  'count' was at most 1, and doesn't get
+     * incremented.  This is consistent with 'count' being < 2 meaning nothing
+     * further to add to the intersection. */
+    if (count < 2) { /* Nothing left to put in the intersection. */
+        len_r = i_r;
+    }
+    else { /* copy the non-exhausted list, unchanged. */
+        IV copy_count = len_a - i_a;
+        if (copy_count > 0) {   /* a is the one with stuff left */
+           Copy(array_a + i_a, array_r + i_r, copy_count, UV);
+        }
+        else {  /* b is the one with stuff left */
+            copy_count = len_b - i_b;
+           Copy(array_b + i_b, array_r + i_r, copy_count, UV);
+        }
+        len_r = i_r + copy_count;
      }
  
      /* Set the result to the final length, which can change the pointer to
      }
  
      /* Set the result to the final length, which can change the pointer to
@@ -9464,50 +9508,261 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
  }
  
  SV*
  }
  
  SV*
-Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
+Perl__add_range_to_invlist(pTHX_ SV* invlist, UV start, UV end)
  {
      /* Add the range from 'start' to 'end' inclusive to the inversion list's
       * set.  A pointer to the inversion list is returned.  This may actually be
       * a new list, in which case the passed in one has been destroyed.  The
       * passed-in inversion list can be NULL, in which case a new one is created
  {
      /* Add the range from 'start' to 'end' inclusive to the inversion list's
       * set.  A pointer to the inversion list is returned.  This may actually be
       * a new list, in which case the passed in one has been destroyed.  The
       * passed-in inversion list can be NULL, in which case a new one is created
-     * with just the one range in it */
-
-    SV* range_invlist;
-    UV len;
-
+     * with just the one range in it.  The new list is not necessarily
+     * NUL-terminated.  Space is not freed if the inversion list shrinks as a
+     * result of this function.  The gain would not be large, and in many
+     * cases, this is called multiple times on a single inversion list, so
+     * anything freed may almost immediately be needed again.
+     *
+     * This used to mostly call the 'union' routine, but that is much more
+     * heavyweight than really needed for a single range addition */
+
+    UV* array;              /* The array implementing the inversion list */
+    UV len;                 /* How many elements in 'array' */
+    SSize_t i_s;            /* index into the invlist array where 'start'
+                               should go */
+    SSize_t i_e = 0;        /* And the index where 'end' should go */
+    UV cur_highest;         /* The highest code point in the inversion list
+                               upon entry to this function */
+
+    /* This range becomes the whole inversion list if none already existed */
      if (invlist == NULL) {
         invlist = _new_invlist(2);
      if (invlist == NULL) {
         invlist = _new_invlist(2);
-       len = 0;
+        _append_range_to_invlist(invlist, start, end);
+        return invlist;
      }
      }
-    else {
-       len = _invlist_len(invlist);
+
+    /* Likewise, if the inversion list is currently empty */
+    len = _invlist_len(invlist);
+    if (len == 0) {
+        _append_range_to_invlist(invlist, start, end);
+        return invlist;
      }
  
      }
  
-    /* If comes after the final entry actually in the list, can just append it
-     * to the end, */
-    if (len == 0
-       || (! ELEMENT_RANGE_MATCHES_INVLIST(len - 1)
-            && start >= invlist_array(invlist)[len - 1]))
-    {
-       _append_range_to_invlist(invlist, start, end);
-       return invlist;
+    /* Starting here, we have to know the internals of the list */
+    array = invlist_array(invlist);
+
+    /* If the new range ends higher than the current highest ... */
+    cur_highest = invlist_highest(invlist);
+    if (end > cur_highest) {
+
+        /* If the whole range is higher, we can just append it */
+        if (start > cur_highest) {
+            _append_range_to_invlist(invlist, start, end);
+            return invlist;
+        }
+
+        /* Otherwise, add the portion that is higher ... */
+        _append_range_to_invlist(invlist, cur_highest + 1, end);
+
+        /* ... and continue on below to handle the rest.  As a result of the
+         * above append, we know that the index of the end of the range is the
+         * final even numbered one of the array.  Recall that the final element
+         * always starts a range that extends to infinity.  If that range is in
+         * the set (meaning the set goes from here to infinity), it will be an
+         * even index, but if it isn't in the set, it's odd, and the final
+         * range in the set is one less, which is even. */
+        if (end == UV_MAX) {
+            i_e = len;
+        }
+        else {
+            i_e = len - 2;
+        }
+    }
+
+    /* We have dealt with appending, now see about prepending.  If the new
+     * range starts lower than the current lowest ... */
+    if (start < array[0]) {
+
+        /* Adding something which has 0 in it is somewhat tricky, and uncommon.
+         * Let the union code handle it, rather than having to know the
+         * trickiness in two code places.  */
+        if (UNLIKELY(start == 0)) {
+            SV* range_invlist;
+
+            range_invlist = _new_invlist(2);
+            _append_range_to_invlist(range_invlist, start, end);
+
+            _invlist_union(invlist, range_invlist, &invlist);
+
+            SvREFCNT_dec_NN(range_invlist);
+
+            return invlist;
+        }
+
+        /* If the whole new range comes before the first entry, and doesn't
+         * extend it, we have to insert it as an additional range */
+        if (end < array[0] - 1) {
+            i_s = i_e = -1;
+            goto splice_in_new_range;
+        }
+
+        /* Here the new range adjoins the existing first range, extending it
+         * downwards. */
+        array[0] = start;
+
+        /* And continue on below to handle the rest.  We know that the index of
+         * the beginning of the range is the first one of the array */
+        i_s = 0;
+    }
+    else { /* Not prepending any part of the new range to the existing list.
+            * Find where in the list it should go.  This finds i_s, such that:
+            *     invlist[i_s] <= start < array[i_s+1]
+            */
+        i_s = _invlist_search(invlist, start);
+    }
+
+    /* At this point, any extending before the beginning of the inversion list
+     * and/or after the end has been done.  This has made it so that, in the
+     * code below, each endpoint of the new range is either in a range that is
+     * in the set, or is in a gap between two ranges that are.  This means we
+     * don't have to worry about exceeding the array bounds.
+     *
+     * Find where in the list the new range ends (but we can skip this if we
+     * have already determined what it is, or if it will be the same as i_s,
+     * which we already have computed) */
+    if (i_e == 0) {
+        i_e = (start == end)
+              ? i_s
+              : _invlist_search(invlist, end);
+    }
+
+    /* Here generally invlist[i_e] <= end < array[i_e+1].  But if invlist[i_e]
+     * is a range that goes to infinity there is no element at invlist[i_e+1],
+     * so only the first relation holds. */
+
+    if ( ! ELEMENT_RANGE_MATCHES_INVLIST(i_s)) {
+
+        /* Here, the ranges on either side of the beginning of the new range
+         * are in the set, and this range starts in the gap between them.
+         *
+         * The new range extends the range above it downwards if the new range
+         * ends at or above that range's start */
+        const bool extends_the_range_above = (   end == UV_MAX
+                                              || end + 1 >= array[i_s+1]);
+
+        /* The new range extends the range below it upwards if it begins just
+         * after where that range ends */
+        if (start == array[i_s]) {
+
+            /* If the new range fills the entire gap between the other ranges,
+             * they will get merged together.  Other ranges may also get
+             * merged, depending on how many of them the new range spans.  In
+             * the general case, we do the merge later, just once, after we
+             * figure out how many to merge.  But in the case where the new
+             * range exactly spans just this one gap (possibly extending into
+             * the one above), we do the merge here, and an early exit.  This
+             * is done here to avoid having to special case later. */
+            if (i_e - i_s <= 1) {
+
+                /* If i_e - i_s == 1, it means that the new range terminates
+                 * within the range above, and hence 'extends_the_range_above'
+                 * must be true.  (If the range above it extends to infinity,
+                 * 'i_s+2' will be above the array's limit, but 'len-i_s-2'
+                 * will be 0, so no harm done.) */
+                if (extends_the_range_above) {
+                    Move(array + i_s + 2, array + i_s, len - i_s - 2, UV);
+                    invlist_set_len(invlist,
+                                    len - 2,
+                                    *(get_invlist_offset_addr(invlist)));
+                    return invlist;
+                }
+
+                /* Here, i_e must == i_s.  We keep them in sync, as they apply
+                 * to the same range, and below we are about to decrement i_s
+                 * */
+                i_e--;
+            }
+
+            /* Here, the new range is adjacent to the one below.  (It may also
+             * span beyond the range above, but that will get resolved later.)
+             * Extend the range below to include this one. */
+            array[i_s] = (end == UV_MAX) ? UV_MAX : end + 1;
+            i_s--;
+            start = array[i_s];
+        }
+        else if (extends_the_range_above) {
+
+            /* Here the new range only extends the range above it, but not the
+             * one below.  It merges with the one above.  Again, we keep i_e
+             * and i_s in sync if they point to the same range */
+            if (i_e == i_s) {
+                i_e++;
+            }
+            i_s++;
+            array[i_s] = start;
+        }
+    }
+
+    /* Here, we've dealt with the new range start extending any adjoining
+     * existing ranges.
+     *
+     * If the new range extends to infinity, it is now the final one,
+     * regardless of what was there before */
+    if (UNLIKELY(end == UV_MAX)) {
+        invlist_set_len(invlist, i_s + 1, *(get_invlist_offset_addr(invlist)));
+        return invlist;
      }
  
      }
  
-    /* Here, can't just append things, create and return a new inversion list
-     * which is the union of this range and the existing inversion list.  (If
-     * the new range is well-behaved wrt to the old one, we could just insert
-     * it, doing a Move() down on the tail of the old one (potentially growing
-     * it first).  But to determine that means we would have the extra
-     * (possibly throw-away) work of first finding where the new one goes and
-     * whether it disrupts (splits) an existing range, so it doesn't appear to
-     * me (khw) that it's worth it) */
-    range_invlist = _new_invlist(2);
-    _append_range_to_invlist(range_invlist, start, end);
+    /* If i_e started as == i_s, it has also been dealt with,
+     * and been updated to the new i_s, which will fail the following if */
+    if (! ELEMENT_RANGE_MATCHES_INVLIST(i_e)) {
  
  
-    _invlist_union(invlist, range_invlist, &invlist);
+        /* Here, the ranges on either side of the end of the new range are in
+         * the set, and this range ends in the gap between them.
+         *
+         * If this range is adjacent to (hence extends) the range above it, it
+         * becomes part of that range; likewise if it extends the range below,
+         * it becomes part of that range */
+        if (end + 1 == array[i_e+1]) {
+            i_e++;
+            array[i_e] = start;
+        }
+        else if (start <= array[i_e]) {
+            array[i_e] = end + 1;
+            i_e--;
+        }
+    }
+
+    if (i_s == i_e) {
+
+        /* If the range fits entirely in an existing range (as possibly already
+         * extended above), it doesn't add anything new */
+        if (ELEMENT_RANGE_MATCHES_INVLIST(i_s)) {
+            return invlist;
+        }
  
  
-    /* The temporary can be freed */
-    SvREFCNT_dec_NN(range_invlist);
+        /* Here, no part of the range is in the list.  Must add it.  It will
+         * occupy 2 more slots */
+      splice_in_new_range:
+
+        invlist_extend(invlist, len + 2);
+        array = invlist_array(invlist);
+        /* Move the rest of the array down two slots. Don't include any
+         * trailing NUL */
+        Move(array + i_e + 1, array + i_e + 3, len - i_e - 1, UV);
+
+        /* Do the actual splice */
+        array[i_e+1] = start;
+        array[i_e+2] = end + 1;
+        invlist_set_len(invlist, len + 2, *(get_invlist_offset_addr(invlist)));
+        return invlist;
+    }
+
+    /* Here the new range crossed the boundaries of a pre-existing range.  The
+     * code above has adjusted things so that both ends are in ranges that are
+     * in the set.  This means everything in between must also be in the set.
+     * Just squash things together */
+    Move(array + i_e + 1, array + i_s + 1, len - i_e - 1, UV);
+    invlist_set_len(invlist,
+                    len - i_e + i_s,
+                    *(get_invlist_offset_addr(invlist)));
  
      return invlist;
  }
  
      return invlist;
  }
@@ -9533,7 +9788,7 @@ Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV element0,
  
      PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST;
  
  
      PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST;
  
-    _append_range_to_invlist(invlist, element0, element0);
+    invlist = add_cp_to_invlist(invlist, element0);
      offset = *get_invlist_offset_addr(invlist);
  
      invlist_set_len(invlist, size, offset);
      offset = *get_invlist_offset_addr(invlist);
  
      invlist_set_len(invlist, size, offset);
@@ -10218,8 +10473,9 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  if (RExC_flags & RXf_PMf_FOLD) {
                      RExC_contains_i = 1;
                  }
                  if (RExC_flags & RXf_PMf_FOLD) {
                      RExC_contains_i = 1;
                  }
-                if (PASS2) {
-                    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+
+                if (UNLIKELY((x_mod_count) > 1)) {
+                    vFAIL("Only one /x regex modifier is allowed");
                  }
                  return;
                  /*NOTREACHED*/
                  }
                  return;
                  /*NOTREACHED*/
@@ -10604,7 +10860,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  RExC_seen |= REG_LOOKBEHIND_SEEN;
                 RExC_in_lookbehind++;
                 RExC_parse++;
                  RExC_seen |= REG_LOOKBEHIND_SEEN;
                 RExC_in_lookbehind++;
                 RExC_parse++;
-                assert(RExC_parse < RExC_end);
+                if (RExC_parse >= RExC_end) {
+                    vFAIL("Sequence (?... not terminated");
+                }
+
                  /* FALLTHROUGH */
             case '=':           /* (?=...) */
                 RExC_seen_zerolen++;
                  /* FALLTHROUGH */
             case '=':           /* (?=...) */
                 RExC_seen_zerolen++;
@@ -11340,7 +11599,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
              FAIL2("panic: regpiece returned NULL, flags=%#"UVxf"", (UV) flags);
         }
         else if (ret == NULL)
              FAIL2("panic: regpiece returned NULL, flags=%#"UVxf"", (UV) flags);
         }
         else if (ret == NULL)
-           ret = latest;
+            ret = latest;
         *flagp |= flags&(HASWIDTH|POSTPONED);
         if (chain == NULL)      /* First piece. */
             *flagp |= flags&SPSTART;
         *flagp |= flags&(HASWIDTH|POSTPONED);
         if (chain == NULL)      /* First piece. */
             *flagp |= flags&SPSTART;
@@ -11950,39 +12209,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
  }
  
  
  }
  
  
-/*
- * reg_recode
- *
- * It returns the code point in utf8 for the value in *encp.
- *    value: a code value in the source encoding
- *    encp:  a pointer to an Encode object
- *
- * If the result from Encode is not a single character,
- * it returns U+FFFD (Replacement character) and sets *encp to NULL.
- */
-STATIC UV
-S_reg_recode(pTHX_ const U8 value, SV **encp)
-{
-    STRLEN numlen = 1;
-    SV * const sv = newSVpvn_flags((const char *) &value, numlen, SVs_TEMP);
-    const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
-    const STRLEN newlen = SvCUR(sv);
-    UV uv = UNICODE_REPLACEMENT;
-
-    PERL_ARGS_ASSERT_REG_RECODE;
-
-    if (newlen)
-       uv = SvUTF8(sv)
-            ? utf8n_to_uvchr((U8*)s, newlen, &numlen, UTF8_ALLOW_DEFAULT)
-            : *(U8*)s;
-
-    if (!newlen || numlen != newlen) {
-       uv = UNICODE_REPLACEMENT;
-       *encp = NULL;
-    }
-    return uv;
-}
-
  PERL_STATIC_INLINE U8
  S_compute_EXACTish(RExC_state_t *pRExC_state)
  {
  PERL_STATIC_INLINE U8
  S_compute_EXACTish(RExC_state_t *pRExC_state)
  {
@@ -12196,13 +12422,15 @@ S_backref_value(char *p)
  /*
   - regatom - the lowest level
  
  /*
   - regatom - the lowest level
  
-   Try to identify anything special at the start of the pattern. If there
-   is, then handle it as required. This may involve generating a single regop,
-   such as for an assertion; or it may involve recursing, such as to
-   handle a () structure.
+   Try to identify anything special at the start of the current parse position.
+   If there is, then handle it as required. This may involve generating a
+   single regop, such as for an assertion; or it may involve recursing, such as
+   to handle a () structure.
  
     If the string doesn't start with something special then we gobble up
  
     If the string doesn't start with something special then we gobble up
-   as much literal text as we can.
+   as much literal text as we can.  If we encounter a quantifier, we have to
+   back off the final literal character, as that quantifier applies to just it
+   and not to the whole string of literals.
  
     Once we have been able to handle whatever type of thing started the
     sequence, we return.
  
     Once we have been able to handle whatever type of thing started the
     sequence, we return.
@@ -12917,6 +13145,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                     || UTF8_IS_START(UCHARAT(RExC_parse)));
  
                     || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
                     || UTF8_IS_START(UCHARAT(RExC_parse)));
  
+            /* Here, we have a literal character.  Find the maximal string of
+             * them in the input that we can fit into a single EXACTish node.
+             * We quit at the first non-literal or when the node gets full */
             for (p = RExC_parse;
                  len < upper_parse && p < RExC_end;
                  len++)
             for (p = RExC_parse;
                  len < upper_parse && p < RExC_end;
                  len++)
@@ -13047,9 +13278,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                 vFAIL(error_msg);
                             }
                              ender = result;
                                 vFAIL(error_msg);
                             }
                              ender = result;
-                           if (IN_ENCODING && ender < 0x100) {
-                               goto recode_encoding;
-                           }
                             if (ender > 0xff) {
                                 REQUIRE_UTF8(flagp);
                             }
                             if (ender > 0xff) {
                                 REQUIRE_UTF8(flagp);
                             }
@@ -13082,11 +13310,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                  if (RExC_recode_x_to_native) {
                                      ender = LATIN1_TO_NATIVE(ender);
                                  }
                                  if (RExC_recode_x_to_native) {
                                      ender = LATIN1_TO_NATIVE(ender);
                                  }
-                                else
  #endif
  #endif
-                                if (IN_ENCODING) {
-                                    goto recode_encoding;
-                                }
                             }
                              else {
                                 REQUIRE_UTF8(flagp);
                             }
                              else {
                                 REQUIRE_UTF8(flagp);
@@ -13146,17 +13370,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                           form_short_octal_warning(p, numlen));
                              }
                         }
                                           form_short_octal_warning(p, numlen));
                              }
                         }
-                       if (IN_ENCODING && ender < 0x100)
-                           goto recode_encoding;
-                       break;
-                     recode_encoding:
-                       if (! RExC_override_recoding) {
-                           SV* enc = _get_encoding();
-                           ender = reg_recode((U8)ender, &enc);
-                           if (!enc && PASS2)
-                               ckWARNreg(p, "Invalid escape in the specified encoding");
-                           REQUIRE_UTF8(flagp);
-                       }
                         break;
                     case '\0':
                         if (p >= RExC_end)
                         break;
                     case '\0':
                         if (p >= RExC_end)
@@ -13174,14 +13387,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                     } /* End of switch on '\' */
                     break;
                 case '{':
                     } /* End of switch on '\' */
                     break;
                 case '{':
-                   /* Currently we don't warn when the lbrace is at the start
+                   /* Currently we don't care if the lbrace is at the start
                      * of a construct.  This catches it in the middle of a
                      * literal string, or when it's the first thing after
                      * something like "\b" */
                      * of a construct.  This catches it in the middle of a
                      * literal string, or when it's the first thing after
                      * something like "\b" */
-                   if (! SIZE_ONLY
-                       && (len || (p > RExC_start && isALPHA_A(*(p -1)))))
-                   {
-                       ckWARNregdep(p + 1, "Unescaped left brace in regex is deprecated, passed through");
+                   if (len || (p > RExC_start && isALPHA_A(*(p -1)))) {
+                        RExC_parse = p + 1;
+                       vFAIL("Unescaped left brace in regex is illegal here");
                     }
                     /*FALLTHROUGH*/
                 default:    /* A literal character */
                     }
                     /*FALLTHROUGH*/
                 default:    /* A literal character */
@@ -13213,6 +13425,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                   * the node, close the node with just them, and set up to do
                   * this character again next time through, when it will be the
                   * only thing in its new node */
                   * the node, close the node with just them, and set up to do
                   * this character again next time through, when it will be the
                   * only thing in its new node */
+
                  if ((next_is_quantifier = (   LIKELY(p < RExC_end)
                                             && UNLIKELY(ISMULT2(p))))
                      && LIKELY(len))
                  if ((next_is_quantifier = (   LIKELY(p < RExC_end)
                                             && UNLIKELY(ISMULT2(p))))
                      && LIKELY(len))
@@ -13586,8 +13799,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             RExC_parse = p - 1;
              Set_Node_Cur_Length(ret, parse_start);
             RExC_parse = p;
             RExC_parse = p - 1;
              Set_Node_Cur_Length(ret, parse_start);
             RExC_parse = p;
-            skip_to_be_ignored_text(pRExC_state, &RExC_parse,
-                                    FALSE /* Don't force to /x */ );
             {
                 /* len is STRLEN which is unsigned, need to copy to signed */
                 IV iv = len;
             {
                 /* len is STRLEN which is unsigned, need to copy to signed */
                 IV iv = len;
@@ -13599,6 +13810,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
         break;
      } /* End of giant switch on input character */
  
         break;
      } /* End of giant switch on input character */
  
+    /* Position parse to next real character */
+    skip_to_be_ignored_text(pRExC_state, &RExC_parse,
+                                            FALSE /* Don't force to /x */ );
+    if (PASS2 && *RExC_parse == '{' && OP(ret) != SBOL && ! regcurly(RExC_parse)) {
+        ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here, passed through");
+    }
+
      return(ret);
  }
  
      return(ret);
  }
  
@@ -13689,8 +13907,8 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
   * routine. q.v. */
  #define ADD_POSIX_WARNING(p, text)  STMT_START {                            \
          if (posix_warnings) {                                               \
   * routine. q.v. */
  #define ADD_POSIX_WARNING(p, text)  STMT_START {                            \
          if (posix_warnings) {                                               \
-            if (! warn_text) warn_text = newAV();                           \
-            av_push(warn_text, Perl_newSVpvf(aTHX_                          \
+            if (! RExC_warn_text ) RExC_warn_text = (AV *) sv_2mortal((SV *) newAV()); \
+            av_push(RExC_warn_text, Perl_newSVpvf(aTHX_                          \
                                               WARNING_PREFIX                 \
                                               text                           \
                                               REPORT_LOCATION,               \
                                               WARNING_PREFIX                 \
                                               text                           \
                                               REPORT_LOCATION,               \
@@ -13821,7 +14039,6 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
      bool has_opening_colon    = FALSE;
      int class_number          = OOB_NAMEDCLASS; /* Out-of-bounds until find
                                                     valid class */
      bool has_opening_colon    = FALSE;
      int class_number          = OOB_NAMEDCLASS; /* Out-of-bounds until find
                                                     valid class */
-    AV* warn_text             = NULL;   /* any warning messages */
      const char * possible_end = NULL;   /* used for a 2nd parse pass */
      const char* name_start;             /* ptr to class name first char */
  
      const char * possible_end = NULL;   /* used for a 2nd parse pass */
      const char* name_start;             /* ptr to class name first char */
  
@@ -13837,6 +14054,9 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
  
      PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX;
  
  
      PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX;
  
+    if (posix_warnings && RExC_warn_text)
+        av_clear(RExC_warn_text);
+
      if (p >= e) {
          return NOT_MEANT_TO_BE_A_POSIX_CLASS;
      }
      if (p >= e) {
          return NOT_MEANT_TO_BE_A_POSIX_CLASS;
      }
@@ -14454,14 +14674,8 @@ S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state,
                  ADD_POSIX_WARNING(p, "there is no terminating ']'");
              }
  
                  ADD_POSIX_WARNING(p, "there is no terminating ']'");
              }
  
-            if (warn_text) {
-                if (posix_warnings) {
-                    /* mortalize to avoid a leak with FATAL warnings */
-                    *posix_warnings = (AV *) sv_2mortal((SV *) warn_text);
-                }
-                else {
-                    SvREFCNT_dec_NN(warn_text);
-                }
+            if (posix_warnings && RExC_warn_text && av_top_index(RExC_warn_text) > -1) {
+                *posix_warnings = RExC_warn_text;
              }
          }
          else if (class_number != OOB_NAMEDCLASS) {
              }
          }
          else if (class_number != OOB_NAMEDCLASS) {
@@ -14533,7 +14747,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                         'stack' of where the undealt-with left
                                         parens would be if they were actually
                                         put there */
                                         'stack' of where the undealt-with left
                                         parens would be if they were actually
                                         put there */
-    IV fence = 0;                   /* Position of where most recent undealt-
+    /* The 'VOL' (expanding to 'volatile') is a workaround for an optimiser bug
+     * in Solaris Studio 12.3. See RT #127455 */
+    VOL IV fence = 0;               /* Position of where most recent undealt-
                                         with left paren in stack is; -1 if none.
                                       */
      STRLEN len;                     /* Temporary */
                                         with left paren in stack is; -1 if none.
                                       */
      STRLEN len;                     /* Temporary */
@@ -15138,21 +15354,7 @@ redo_curchar:
               * may have altered the stack in the time since we earlier set
               * 'top_index'.  */
  
               * may have altered the stack in the time since we earlier set
               * 'top_index'.  */
  
-            {
-                /* Work round an optimiser bug in Solaris Studio 12.3:
-                 * for some reason, the presence of the __assert() in
-                 * av_tindex_nomg() causes the value of fence to get
-                 * corrupted, even though the assert is never called. So
-                 * save the value then restore afterwards.
-                 * Note that in fact merely accessing the value of fence
-                 * prior to the statement containing the assert is enough
-                 * to make the bug go away.
-                 */
-                IV f = fence;
-                top_index = av_tindex_nomg(stack);
-                fence = f;
-            }
-
+            top_index = av_tindex_nomg(stack);
              if (top_index - fence >= 0) {
                  /* If the top entry on the stack is an operator, it had better
                   * be a '!', otherwise the entry below the top operand should
              if (top_index - fence >= 0) {
                  /* If the top entry on the stack is an operator, it had better
                   * be a '!', otherwise the entry below the top operand should
@@ -15972,6 +16174,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      SAVEFREEPV(name);
                      if (FOLD) {
                          lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
                      SAVEFREEPV(name);
                      if (FOLD) {
                          lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
+
+                        /* The function call just below that uses this can fail
+                         * to return, leaking memory if we don't do this */
+                        SAVEFREEPV(lookup_name);
                      }
  
                      /* Look up the property name, and get its swash and
                      }
  
                      /* Look up the property name, and get its swash and
@@ -15987,9 +16193,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                               NULL, /* No inversion list */
                                               &swash_init_flags
                                              );
                                               NULL, /* No inversion list */
                                               &swash_init_flags
                                              );
-                    if (lookup_name) {
-                        Safefree(lookup_name);
-                    }
                      if (! swash || ! (invlist = _get_swash_invlist(swash))) {
                          HV* curpkg = (IN_PERL_COMPILETIME)
                                        ? PL_curstash
                      if (! swash || ! (invlist = _get_swash_invlist(swash))) {
                          HV* curpkg = (IN_PERL_COMPILETIME)
                                        ? PL_curstash
@@ -16141,9 +16344,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     }
                 }
                  non_portable_endpoint++;
                     }
                 }
                  non_portable_endpoint++;
-               if (IN_ENCODING && value < 0x100) {
-                   goto recode_encoding;
-               }
                 break;
             case 'x':
                 RExC_parse--;   /* function expects to be pointed at the 'x' */
                 break;
             case 'x':
                 RExC_parse--;   /* function expects to be pointed at the 'x' */
@@ -16161,8 +16361,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                     }
                 }
                  non_portable_endpoint++;
                     }
                 }
                  non_portable_endpoint++;
-               if (IN_ENCODING && value < 0x100)
-                   goto recode_encoding;
                 break;
             case 'c':
                 value = grok_bslash_c(*RExC_parse++, PASS2);
                 break;
             case 'c':
                 value = grok_bslash_c(*RExC_parse++, PASS2);
@@ -16195,23 +16393,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          }
                      }
                      non_portable_endpoint++;
                          }
                      }
                      non_portable_endpoint++;
-                   if (IN_ENCODING && value < 0x100)
-                       goto recode_encoding;
-                   break;
-               }
-             recode_encoding:
-               if (! RExC_override_recoding) {
-                   SV* enc = _get_encoding();
-                   value = reg_recode((U8)value, &enc);
-                   if (!enc) {
-                        if (strict) {
-                            vFAIL("Invalid escape in the specified encoding");
-                        }
-                        else if (PASS2) {
-                            ckWARNreg(RExC_parse,
-                                 "Invalid escape in the specified encoding");
-                        }
-                    }
                     break;
                 }
             default:
                     break;
                 }
             default:
@@ -16352,9 +16533,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              else if (! SIZE_ONLY) {
  
                  /* Here, not in pass1 (in that pass we skip calculating the
              else if (! SIZE_ONLY) {
  
                  /* Here, not in pass1 (in that pass we skip calculating the
-                 * contents of this class), and is /l, or is a POSIX class for
-                 * which /l doesn't matter (or is a Unicode property, which is
-                 * skipped here). */
+                 * contents of this class), and is not /l, or is a POSIX class
+                 * for which /l doesn't matter (or is a Unicode property, which
+                 * is skipped here). */
                  if (namedclass >= ANYOF_POSIXL_MAX) {  /* If a special class */
                      if (namedclass != ANYOF_UNIPROP) { /* UNIPROP = \p and \P */
  
                  if (namedclass >= ANYOF_POSIXL_MAX) {  /* If a special class */
                      if (namedclass != ANYOF_UNIPROP) { /* UNIPROP = \p and \P */
  
@@ -16379,9 +16560,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                  &cp_list);
                      }
                  }
                                  &cp_list);
                      }
                  }
-                else if (UNI_SEMANTICS
+                else if (  UNI_SEMANTICS
                          || classnum == _CC_ASCII
                          || classnum == _CC_ASCII
-                        || (DEPENDS_SEMANTICS && (classnum == _CC_DIGIT
+                        || (DEPENDS_SEMANTICS && (   classnum == _CC_DIGIT
                                                    || classnum == _CC_XDIGIT)))
                  {
                      /* We usually have to worry about /d and /a affecting what
                                                    || classnum == _CC_XDIGIT)))
                  {
                      /* We usually have to worry about /d and /a affecting what
@@ -17178,76 +17359,152 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         SvREFCNT_dec_NN(cp_foldable_list);
      }
  
         SvREFCNT_dec_NN(cp_foldable_list);
      }
  
-    /* And combine the result (if any) with any inversion list from posix
+    /* And combine the result (if any) with any inversion lists from posix
       * classes.  The lists are kept separate up to now because we don't want to
       * fold the classes (folding of those is automatically handled by the swash
       * fetching code) */
       * classes.  The lists are kept separate up to now because we don't want to
       * fold the classes (folding of those is automatically handled by the swash
       * fetching code) */
-    if (simple_posixes) {
-        _invlist_union(cp_list, simple_posixes, &cp_list);
-        SvREFCNT_dec_NN(simple_posixes);
+    if (simple_posixes) {   /* These are the classes known to be unaffected by
+                               /a, /aa, and /d */
+        if (cp_list) {
+            _invlist_union(cp_list, simple_posixes, &cp_list);
+            SvREFCNT_dec_NN(simple_posixes);
+        }
+        else {
+            cp_list = simple_posixes;
+        }
      }
      if (posixes || nposixes) {
      }
      if (posixes || nposixes) {
-        if (posixes && AT_LEAST_ASCII_RESTRICTED) {
+
+        /* We have to adjust /a and /aa */
+        if (AT_LEAST_ASCII_RESTRICTED) {
+
              /* Under /a and /aa, nothing above ASCII matches these */
              /* Under /a and /aa, nothing above ASCII matches these */
-            _invlist_intersection(posixes,
-                                  PL_XPosix_ptrs[_CC_ASCII],
-                                  &posixes);
-        }
-        if (nposixes) {
-            if (DEPENDS_SEMANTICS) {
-                /* Under /d, everything in the upper half of the Latin1 range
-                 * matches these complements */
-                ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+            if (posixes) {
+                _invlist_intersection(posixes,
+                                    PL_XPosix_ptrs[_CC_ASCII],
+                                    &posixes);
              }
              }
-            else if (AT_LEAST_ASCII_RESTRICTED) {
-                /* Under /a and /aa, everything above ASCII matches these
-                 * complements */
+
+            /* Under /a and /aa, everything above ASCII matches these
+             * complements */
+            if (nposixes) {
                  _invlist_union_complement_2nd(nposixes,
                                                PL_XPosix_ptrs[_CC_ASCII],
                                                &nposixes);
              }
                  _invlist_union_complement_2nd(nposixes,
                                                PL_XPosix_ptrs[_CC_ASCII],
                                                &nposixes);
              }
-            if (posixes) {
-                _invlist_union(posixes, nposixes, &posixes);
-                SvREFCNT_dec_NN(nposixes);
-            }
-            else {
-                posixes = nposixes;
-            }
          }
          }
+
          if (! DEPENDS_SEMANTICS) {
          if (! DEPENDS_SEMANTICS) {
-            if (cp_list) {
-                _invlist_union(cp_list, posixes, &cp_list);
-                SvREFCNT_dec_NN(posixes);
+
+            /* For everything but /d, we can just add the current 'posixes' and
+             * 'nposixes' to the main list */
+            if (posixes) {
+                if (cp_list) {
+                    _invlist_union(cp_list, posixes, &cp_list);
+                    SvREFCNT_dec_NN(posixes);
+                }
+                else {
+                    cp_list = posixes;
+                }
              }
              }
-            else {
-                cp_list = posixes;
+            if (nposixes) {
+                if (cp_list) {
+                    _invlist_union(cp_list, nposixes, &cp_list);
+                    SvREFCNT_dec_NN(nposixes);
+                }
+                else {
+                    cp_list = nposixes;
+                }
              }
          }
          else {
              }
          }
          else {
-            /* Under /d, we put into a separate list the Latin1 things that
-             * match only when the target string is utf8 */
-            SV* nonascii_but_latin1_properties = NULL;
-            _invlist_intersection(posixes, PL_UpperLatin1,
-                                  &nonascii_but_latin1_properties);
-            _invlist_subtract(posixes, nonascii_but_latin1_properties,
-                              &posixes);
-            if (cp_list) {
-                _invlist_union(cp_list, posixes, &cp_list);
-                SvREFCNT_dec_NN(posixes);
+            /* Under /d, things like \w match upper Latin1 characters only if
+             * the target string is in UTF-8.  But things like \W match all the
+             * upper Latin1 characters if the target string is not in UTF-8.
+             *
+             * Handle the case where there something like \W separately */
+            if (nposixes) {
+                SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1);
+
+                /* A complemented posix class matches all upper Latin1
+                 * characters if not in UTF-8.  And it matches just certain
+                 * ones when in UTF-8.  That means those certain ones are
+                 * matched regardless, so can just be added to the
+                 * unconditional list */
+                if (cp_list) {
+                    _invlist_union(cp_list, nposixes, &cp_list);
+                    SvREFCNT_dec_NN(nposixes);
+                    nposixes = NULL;
+                }
+                else {
+                    cp_list = nposixes;
+                }
+
+                /* Likewise for 'posixes' */
+                _invlist_union(posixes, cp_list, &cp_list);
+
+                /* Likewise for anything else in the range that matched only
+                 * under UTF-8 */
+                if (has_upper_latin1_only_utf8_matches) {
+                    _invlist_union(cp_list,
+                                   has_upper_latin1_only_utf8_matches,
+                                   &cp_list);
+                    SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
+                    has_upper_latin1_only_utf8_matches = NULL;
+                }
+
+                /* If we don't match all the upper Latin1 characters regardless
+                 * of UTF-8ness, we have to set a flag to match the rest when
+                 * not in UTF-8 */
+                _invlist_subtract(only_non_utf8_list, cp_list,
+                                  &only_non_utf8_list);
+                if (_invlist_len(only_non_utf8_list) != 0) {
+                    ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+                }
              }
              else {
              }
              else {
-                cp_list = posixes;
-            }
-
-            if (has_upper_latin1_only_utf8_matches) {
+                /* Here there were no complemented posix classes.  That means
+                 * the upper Latin1 characters in 'posixes' match only when the
+                 * target string is in UTF-8.  So we have to add them to the
+                 * list of those types of code points, while adding the
+                 * remainder to the unconditional list.
+                 *
+                 * First calculate what they are */
+                SV* nonascii_but_latin1_properties = NULL;
+                _invlist_intersection(posixes, PL_UpperLatin1,
+                                      &nonascii_but_latin1_properties);
+
+                /* And add them to the final list of such characters. */
                  _invlist_union(has_upper_latin1_only_utf8_matches,
                                 nonascii_but_latin1_properties,
                                 &has_upper_latin1_only_utf8_matches);
                  _invlist_union(has_upper_latin1_only_utf8_matches,
                                 nonascii_but_latin1_properties,
                                 &has_upper_latin1_only_utf8_matches);
-                SvREFCNT_dec_NN(nonascii_but_latin1_properties);
-            }
-            else {
-                has_upper_latin1_only_utf8_matches
-                                            = nonascii_but_latin1_properties;
+
+                /* Remove them from what now becomes the unconditional list */
+                _invlist_subtract(posixes, nonascii_but_latin1_properties,
+                                  &posixes);
+
+                /* And add those unconditional ones to the final list */
+                if (cp_list) {
+                    _invlist_union(cp_list, posixes, &cp_list);
+                    SvREFCNT_dec_NN(posixes);
+                    posixes = NULL;
+                }
+                else {
+                    cp_list = posixes;
+                }
+
+                SvREFCNT_dec(nonascii_but_latin1_properties);
+
+                /* Get rid of any characters that we now know are matched
+                 * unconditionally from the conditional list, which may make
+                 * that list empty */
+                _invlist_subtract(has_upper_latin1_only_utf8_matches,
+                                  cp_list,
+                                  &has_upper_latin1_only_utf8_matches);
+                if (_invlist_len(has_upper_latin1_only_utf8_matches) == 0) {
+                    SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
+                    has_upper_latin1_only_utf8_matches = NULL;
+                }
              }
          }
      }
              }
          }
      }
@@ -17337,79 +17594,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              invlist_iterfinish(cp_list);
          }
      }
              invlist_iterfinish(cp_list);
          }
      }
-
-#define MATCHES_ALL_NON_UTF8_NON_ASCII(ret)                                 \
-    (   DEPENDS_SEMANTICS                                                   \
-     && (ANYOF_FLAGS(ret)                                                   \
-        & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
-
-    /* See if we can simplify things under /d */
-    if (   has_upper_latin1_only_utf8_matches
-        || MATCHES_ALL_NON_UTF8_NON_ASCII(ret))
+    else if (   DEPENDS_SEMANTICS
+             && (    has_upper_latin1_only_utf8_matches
+                 || (ANYOF_FLAGS(ret) & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)))
      {
      {
-        /* But not if we are inverting, as that screws it up */
-        if (! invert) {
-            if (has_upper_latin1_only_utf8_matches) {
-                if (MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
-
-                    /* Here, we have both the flag and inversion list.  Any
-                     * character in 'has_upper_latin1_only_utf8_matches'
-                     * matches when UTF-8 is in effect, but it also matches
-                     * when UTF-8 is not in effect because of
-                     * MATCHES_ALL_NON_UTF8_NON_ASCII.  Therefore it matches
-                     * unconditionally, so can be added to the regular list,
-                     * and 'has_upper_latin1_only_utf8_matches' cleared */
-                    _invlist_union(cp_list,
-                                   has_upper_latin1_only_utf8_matches,
-                                   &cp_list);
-                    SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
-                    has_upper_latin1_only_utf8_matches = NULL;
-                }
-                else if (cp_list) {
-
-                    /* Here, 'cp_list' gives chars that always match, and
-                     * 'has_upper_latin1_only_utf8_matches' gives chars that
-                     * were specified to match only if the target string is in
-                     * UTF-8.  It may be that these overlap, so we can subtract
-                     * the unconditionally matching from the conditional ones,
-                     * to make the conditional list as small as possible,
-                     * perhaps even clearing it, in which case more
-                     * optimizations are possible later */
-                    _invlist_subtract(has_upper_latin1_only_utf8_matches,
-                                      cp_list,
-                                      &has_upper_latin1_only_utf8_matches);
-                    if (_invlist_len(has_upper_latin1_only_utf8_matches) == 0) {
-                        SvREFCNT_dec_NN(has_upper_latin1_only_utf8_matches);
-                        has_upper_latin1_only_utf8_matches = NULL;
-                    }
-                }
-            }
-
-            /* Similarly, if the unconditional matches include every upper
-             * latin1 character, we can clear that flag to permit later
-             * optimizations */
-            if (cp_list && MATCHES_ALL_NON_UTF8_NON_ASCII(ret)) {
-                SV* only_non_utf8_list = invlist_clone(PL_UpperLatin1);
-                _invlist_subtract(only_non_utf8_list, cp_list,
-                                  &only_non_utf8_list);
-                if (_invlist_len(only_non_utf8_list) == 0) {
-                    ANYOF_FLAGS(ret) &= ~ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
-                }
-                SvREFCNT_dec_NN(only_non_utf8_list);
-                only_non_utf8_list = NULL;;
-            }
-        }
-
-        /* If we haven't gotten rid of all conditional matching, we change the
-         * regnode type to indicate that */
-        if (   has_upper_latin1_only_utf8_matches
-            || MATCHES_ALL_NON_UTF8_NON_ASCII(ret))
-        {
-            OP(ret) = ANYOFD;
-            optimizable = FALSE;
-        }
+        OP(ret) = ANYOFD;
+        optimizable = FALSE;
      }
      }
-#undef MATCHES_ALL_NON_UTF8_NON_ASCII
+
  
      /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
       * at compile time.  Besides not inverting folded locale now, we can't
  
      /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
       * at compile time.  Besides not inverting folded locale now, we can't
@@ -18107,7 +18299,7 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
          RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
  
          skip_to_be_ignored_text(pRExC_state, &RExC_parse,
          RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
  
          skip_to_be_ignored_text(pRExC_state, &RExC_parse,
-                                FALSE /* Don't assume /x */ );
+                                FALSE /* Don't force /x */ );
      }
  }
  
      }
  }
  
@@ -18237,7 +18429,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
         RExC_size += size;
         return;
      }
         RExC_size += size;
         return;
      }
-
+    assert(!RExC_study_started); /* I believe we should never use reginsert once we have started
+                                    studying. If this is wrong then we need to adjust RExC_recurse
+                                    below like we do with RExC_open_parens/RExC_close_parens. */
      src = RExC_emit;
      RExC_emit += size;
      dst = RExC_emit;
      src = RExC_emit;
      RExC_emit += size;
      dst = RExC_emit;
@@ -18248,7 +18442,10 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
           * iow it is 1 more than the number of parens seen in
           * the pattern so far. */
          for ( paren=0 ; paren < RExC_npar ; paren++ ) {
           * iow it is 1 more than the number of parens seen in
           * the pattern so far. */
          for ( paren=0 ; paren < RExC_npar ; paren++ ) {
-            if ( RExC_open_parens[paren] >= opnd ) {
+            /* note, RExC_open_parens[0] is the start of the
+             * regex, it can't move. RExC_close_parens[0] is the end
+             * of the regex, it *can* move. */
+            if ( paren && RExC_open_parens[paren] >= opnd ) {
                  /*DEBUG_PARSE_FMT("open"," - %d",size);*/
                  RExC_open_parens[paren] += size;
              } else {
                  /*DEBUG_PARSE_FMT("open"," - %d",size);*/
                  RExC_open_parens[paren] += size;
              } else {
@@ -18743,7 +18940,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                                                   : TRIE_BITMAP(trie)),
                                                  NULL,
                                                  NULL,
                                                   : TRIE_BITMAP(trie)),
                                                  NULL,
                                                  NULL,
-                                                NULL
+                                                NULL,
+                                                FALSE
                                                 );
              sv_catpvs(sv, "]");
          }
                                                 );
              sv_catpvs(sv, "]");
          }
@@ -18842,6 +19040,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          /* And things that aren't in the bitmap, but are small enough to be */
          SV* bitmap_range_not_in_bitmap = NULL;
  
          /* And things that aren't in the bitmap, but are small enough to be */
          SV* bitmap_range_not_in_bitmap = NULL;
  
+        const bool inverted = flags & ANYOF_INVERT;
+
         if (OP(o) == ANYOFL) {
              if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
                  sv_catpvs(sv, "{utf8-locale-reqd}");
         if (OP(o) == ANYOFL) {
              if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
                  sv_catpvs(sv, "{utf8-locale-reqd}");
@@ -18886,21 +19086,37 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                                                ANYOF_BITMAP(o),
                                                bitmap_range_not_in_bitmap,
                                                only_utf8_locale_invlist,
                                                ANYOF_BITMAP(o),
                                                bitmap_range_not_in_bitmap,
                                                only_utf8_locale_invlist,
-                                              o);
+                                              o,
+
+                                              /* Can't try inverting for a
+                                               * better display if there are
+                                               * things that haven't been
+                                               * resolved */
+                                              unresolved != NULL);
          SvREFCNT_dec(bitmap_range_not_in_bitmap);
  
          /* If there are user-defined properties which haven't been defined yet,
          SvREFCNT_dec(bitmap_range_not_in_bitmap);
  
          /* If there are user-defined properties which haven't been defined yet,
-         * output them, in a separate [] from the bitmap range stuff */
+         * output them.  If the result is not to be inverted, it is clearest to
+         * output them in a separate [] from the bitmap range stuff.  If the
+         * result is to be complemented, we have to show everything in one [],
+         * as the inversion applies to the whole thing.  Use {braces} to
+         * separate them from anything in the bitmap and anything above the
+         * bitmap. */
          if (unresolved) {
          if (unresolved) {
-            if (do_sep) {
-                Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
+            if (inverted) {
+                if (! do_sep) { /* If didn't output anything in the bitmap */
+                    sv_catpvs(sv, "^");
+                }
+                sv_catpvs(sv, "{");
              }
              }
-            if (flags & ANYOF_INVERT) {
-                sv_catpvs(sv, "^");
+            else if (do_sep) {
+                Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
              }
              sv_catsv(sv, unresolved);
              }
              sv_catsv(sv, unresolved);
-            do_sep = TRUE;
-            SvREFCNT_dec_NN(unresolved);
+            if (inverted) {
+                sv_catpvs(sv, "}");
+            }
+            do_sep = ! inverted;
          }
  
          /* And, finally, add the above-the-bitmap stuff */
          }
  
          /* And, finally, add the above-the-bitmap stuff */
@@ -18917,9 +19133,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                  Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
              }
  
                  Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
              }
  
-            /* And, for easy of understanding, it is always output not-shown as
-             * complemented */
-            if (flags & ANYOF_INVERT) {
+            /* And, for easy of understanding, it is shown in the
+             * uncomplemented form if possible.  The one exception being if
+             * there are unresolved items, where the inversion has to be
+             * delayed until runtime */
+            if (inverted && ! unresolved) {
                  _invlist_invert(nonbitmap_invlist);
                  _invlist_subtract(nonbitmap_invlist, PL_InBitmap, &nonbitmap_invlist);
              }
                  _invlist_invert(nonbitmap_invlist);
                  _invlist_subtract(nonbitmap_invlist, PL_InBitmap, &nonbitmap_invlist);
              }
@@ -18956,6 +19174,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
          /* And finally the matching, closing ']' */
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
  
          /* And finally the matching, closing ']' */
         Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
+
+        SvREFCNT_dec(unresolved);
      }
      else if (k == POSIXD || k == NPOSIXD) {
          U8 index = FLAGS(o) * 2;
      }
      else if (k == POSIXD || k == NPOSIXD) {
          U8 index = FLAGS(o) * 2;
@@ -19769,30 +19989,36 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
           * mnemonic names.  Split off any of those at the beginning and end of
           * the range to print mnemonically.  It isn't possible for many of
           * these to be in a row, so this won't overwhelm with output */
           * mnemonic names.  Split off any of those at the beginning and end of
           * the range to print mnemonically.  It isn't possible for many of
           * these to be in a row, so this won't overwhelm with output */
-        while (isMNEMONIC_CNTRL(start) && start <= end) {
-            put_code_point(sv, start);
-            start++;
-        }
-        if (start < end && isMNEMONIC_CNTRL(end)) {
-
-            /* Here, the final character in the range has a mnemonic name.
-             * Work backwards from the end to find the final non-mnemonic */
-            UV temp_end = end - 1;
-            while (isMNEMONIC_CNTRL(temp_end)) {
-                temp_end--;
+        if (   start <= end
+            && (isMNEMONIC_CNTRL(start) || isMNEMONIC_CNTRL(end)))
+        {
+            while (isMNEMONIC_CNTRL(start) && start <= end) {
+                put_code_point(sv, start);
+                start++;
              }
  
              }
  
-            /* And separately output the interior range that doesn't start or
-             * end with mnemonics */
-            put_range(sv, start, temp_end, FALSE);
+            /* If this didn't take care of the whole range ... */
+            if (start <= end) {
  
  
-            /* Then output the mnemonic trailing controls */
-            start = temp_end + 1;
-            while (start <= end) {
-                put_code_point(sv, start);
-                start++;
+                /* Look backwards from the end to find the final non-mnemonic
+                 * */
+                UV temp_end = end;
+                while (isMNEMONIC_CNTRL(temp_end)) {
+                    temp_end--;
+                }
+
+                /* And separately output the interior range that doesn't start
+                 * or end with mnemonics */
+                put_range(sv, start, temp_end, FALSE);
+
+                /* Then output the mnemonic trailing controls */
+                start = temp_end + 1;
+                while (start <= end) {
+                    put_code_point(sv, start);
+                    start++;
+                }
+                break;
              }
              }
-            break;
          }
  
          /* As a final resort, output the range or subrange as hex. */
          }
  
          /* As a final resort, output the range or subrange as hex. */
@@ -19880,7 +20106,9 @@ S_put_charclass_bitmap_innards_common(pTHX_
  )
  {
      /* Create and return an SV containing a displayable version of the bitmap
  )
  {
      /* Create and return an SV containing a displayable version of the bitmap
-     * and associated information determined by the input parameters. */
+     * and associated information determined by the input parameters.  If the
+     * output would have been only the inversion indicator '^', NULL is instead
+     * returned. */
  
      SV * output;
  
  
      SV * output;
  
@@ -19939,9 +20167,8 @@ S_put_charclass_bitmap_innards_common(pTHX_
          }
      }
  
          }
      }
  
-    /* If the only thing we output is the '^', clear it */
      if (invert && SvCUR(output) == 1) {
      if (invert && SvCUR(output) == 1) {
-        SvCUR_set(output, 0);
+        return NULL;
      }
  
      return output;
      }
  
      return output;
@@ -19952,7 +20179,8 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
                                       char *bitmap,
                                       SV *nonbitmap_invlist,
                                       SV *only_utf8_locale_invlist,
                                       char *bitmap,
                                       SV *nonbitmap_invlist,
                                       SV *only_utf8_locale_invlist,
-                                     const regnode * const node)
+                                     const regnode * const node,
+                                     const bool force_as_is_display)
  {
      /* Appends to 'sv' a displayable version of the innards of the bracketed
       * character class defined by the other arguments:
  {
      /* Appends to 'sv' a displayable version of the innards of the bracketed
       * character class defined by the other arguments:
@@ -19968,13 +20196,16 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
       *  'node' is the regex pattern node.  It is needed only when the above two
       *      parameters are not null, and is passed so that this routine can
       *      tease apart the various reasons for them.
       *  'node' is the regex pattern node.  It is needed only when the above two
       *      parameters are not null, and is passed so that this routine can
       *      tease apart the various reasons for them.
+     *  'force_as_is_display' is TRUE if this routine should definitely NOT try
+     *      to invert things to see if that leads to a cleaner display.  If
+     *      FALSE, this routine is free to use its judgment about doing this.
       *
       * It returns TRUE if there was actually something output.  (It may be that
       * the bitmap, etc is empty.)
       *
       * When called for outputting the bitmap of a non-ANYOF node, just pass the
       *
       * It returns TRUE if there was actually something output.  (It may be that
       * the bitmap, etc is empty.)
       *
       * When called for outputting the bitmap of a non-ANYOF node, just pass the
-     * bitmap, with the succeeding parameters set to NULL.
-     *
+     * bitmap, with the succeeding parameters set to NULL, and the final one to
+     * FALSE.
       */
  
      /* In general, it tries to display the 'cleanest' representation of the
       */
  
      /* In general, it tries to display the 'cleanest' representation of the
@@ -19982,7 +20213,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
       * whether the class itself is to be inverted.  However,  there are some
       * cases where it can't try inverting, as what actually matches isn't known
       * until runtime, and hence the inversion isn't either. */
       * whether the class itself is to be inverted.  However,  there are some
       * cases where it can't try inverting, as what actually matches isn't known
       * until runtime, and hence the inversion isn't either. */
-    bool inverting_allowed = TRUE;
+    bool inverting_allowed = ! force_as_is_display;
  
      int i;
      STRLEN orig_sv_cur = SvCUR(sv);
  
      int i;
      STRLEN orig_sv_cur = SvCUR(sv);
@@ -19998,7 +20229,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
                                         is UTF-8 */
  
      SV* as_is_display;      /* The output string when we take the inputs
                                         is UTF-8 */
  
      SV* as_is_display;      /* The output string when we take the inputs
-                              literally */
+                               literally */
      SV* inverted_display;   /* The output string when we invert the inputs */
  
      U8 flags = (node) ? ANYOF_FLAGS(node) : 0;
      SV* inverted_display;   /* The output string when we invert the inputs */
  
      U8 flags = (node) ? ANYOF_FLAGS(node) : 0;
@@ -20036,14 +20267,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
              /* And this flag for matching all non-ASCII 0xFF and below */
              if (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
              {
              /* And this flag for matching all non-ASCII 0xFF and below */
              if (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
              {
-                if (invert) {
-                    not_utf8 = _new_invlist(0);
-                }
-                else {
-                    not_utf8 = invlist_clone(PL_UpperLatin1);
-                }
-                inverting_allowed = FALSE;  /* XXX needs more work to be able
-                                               to allow this */
+                not_utf8 = invlist_clone(PL_UpperLatin1);
              }
          }
          else if (OP(node) == ANYOFL) {
              }
          }
          else if (OP(node) == ANYOFL) {
@@ -20118,7 +20342,10 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
  
      /* If have to take the output as-is, just do that */
      if (! inverting_allowed) {
  
      /* If have to take the output as-is, just do that */
      if (! inverting_allowed) {
-        sv_catsv(sv, as_is_display);
+        if (as_is_display) {
+            sv_catsv(sv, as_is_display);
+            SvREFCNT_dec_NN(as_is_display);
+        }
      }
      else { /* But otherwise, create the output again on the inverted input, and
                use whichever version is shorter */
      }
      else { /* But otherwise, create the output again on the inverted input, and
                use whichever version is shorter */
@@ -20145,6 +20372,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
           * conditional code points, so that when inverted, they will be gone
           * from it */
          _invlist_union(only_utf8, invlist, &invlist);
           * conditional code points, so that when inverted, they will be gone
           * from it */
          _invlist_union(only_utf8, invlist, &invlist);
+        _invlist_union(not_utf8, invlist, &invlist);
          _invlist_union(only_utf8_locale, invlist, &invlist);
          _invlist_invert(invlist);
          _invlist_intersection(invlist, PL_InBitmap, &invlist);
          _invlist_union(only_utf8_locale, invlist, &invlist);
          _invlist_invert(invlist);
          _invlist_intersection(invlist, PL_InBitmap, &invlist);
@@ -20153,10 +20381,13 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
              _invlist_invert(only_utf8);
              _invlist_intersection(only_utf8, PL_UpperLatin1, &only_utf8);
          }
              _invlist_invert(only_utf8);
              _invlist_intersection(only_utf8, PL_UpperLatin1, &only_utf8);
          }
+        else if (not_utf8) {
  
  
-        if (not_utf8) {
-            _invlist_invert(not_utf8);
-            _invlist_intersection(not_utf8, PL_UpperLatin1, &not_utf8);
+            /* If a code point matches iff the target string is not in UTF-8,
+             * then complementing the result has it not match iff not in UTF-8,
+             * which is the same thing as matching iff it is UTF-8. */
+            only_utf8 = not_utf8;
+            not_utf8 = NULL;
          }
  
          if (only_utf8_locale) {
          }
  
          if (only_utf8_locale) {
@@ -20175,17 +20406,19 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
  
          /* Use the shortest representation, taking into account our bias
           * against showing it inverted */
  
          /* Use the shortest representation, taking into account our bias
           * against showing it inverted */
-        if (SvCUR(inverted_display) + inverted_bias
-            < SvCUR(as_is_display) + as_is_bias)
+        if (   inverted_display
+            && (   ! as_is_display
+                || (  SvCUR(inverted_display) + inverted_bias
+                    < SvCUR(as_is_display)    + as_is_bias)))
          {
             sv_catsv(sv, inverted_display);
          }
          {
             sv_catsv(sv, inverted_display);
          }
-        else {
+        else if (as_is_display) {
             sv_catsv(sv, as_is_display);
          }
  
             sv_catsv(sv, as_is_display);
          }
  
-        SvREFCNT_dec_NN(as_is_display);
-        SvREFCNT_dec_NN(inverted_display);
+        SvREFCNT_dec(as_is_display);
+        SvREFCNT_dec(inverted_display);
      }
  
      SvREFCNT_dec_NN(invlist);
      }
  
      SvREFCNT_dec_NN(invlist);