Use SSize_t for tmps stack offsets

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index 6bd9084..69bb66d 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -367,6 +367,24 @@ typedef struct scan_data_t {
      struct regnode_charclass_class *start_class;
  } scan_data_t;
  
+/* The below is perhaps overboard, but this allows us to save a test at the
+ * expense of a mask.  This is because on both EBCDIC and ASCII machines, 'A'
+ * and 'a' differ by a single bit; the same with the upper and lower case of
+ * all other ASCII-range alphabetics.  On ASCII platforms, they are 32 apart;
+ * on EBCDIC, they are 64.  This uses an exclusive 'or' to find that bit and
+ * then inverts it to form a mask, with just a single 0, in the bit position
+ * where the upper- and lowercase differ.  XXX There are about 40 other
+ * instances in the Perl core where this micro-optimization could be used.
+ * Should decide if maintenance cost is worse, before changing those
+ *
+ * Returns a boolean as to whether or not 'v' is either a lowercase or
+ * uppercase instance of 'c', where 'c' is in [A-Za-z].  If 'c' is a
+ * compile-time constant, the generated code is better than some optimizing
+ * compilers figure out, amounting to a mask and test.  The results are
+ * meaningless if 'c' is not one of [A-Za-z] */
+#define isARG2_lower_or_UPPER_ARG1(c, v) \
+                              (((v) & ~('A' ^ 'a')) ==  ((c) & ~('A' ^ 'a')))
+
  /*
   * Forward declarations for pregcomp()'s friends.
   */
@@ -848,7 +866,7 @@ S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
  }
  
  /* These two functions currently do the exact same thing */
-#define cl_init_zero           S_cl_init
+#define cl_init_zero           cl_init
  
  /* 'AND' a given class with another one.  Can create false positives.  'cl'
   * should not be inverted.  'and_with->flags & ANYOF_CLASS' should be 0 if
@@ -2938,16 +2956,6 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, b
               * have to find at least two characters for a multi-fold */
             const U8* upper = (OP(scan) == EXACTF) ? s_end : s_end -1;
  
-            /* The below is perhaps overboard, but this allows us to save a
-             * test each time through the loop at the expense of a mask.  This
-             * is because on both EBCDIC and ASCII machines, 'S' and 's' differ
-             * by a single bit.  On ASCII they are 32 apart; on EBCDIC, they
-             * are 64.  This uses an exclusive 'or' to find that bit and then
-             * inverts it to form a mask, with just a single 0, in the bit
-             * position where 'S' and 's' differ. */
-            const U8 S_or_s_mask = (U8) ~ ('S' ^ 's');
-            const U8 s_masked = 's' & S_or_s_mask;
-
             while (s < upper) {
                  int len = is_MULTI_CHAR_FOLD_latin1_safe(s, s_end);
                  if (! len) {    /* Not a multi-char fold. */
@@ -2960,8 +2968,8 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, UV *min_subtract, b
                  }
  
                  if (len == 2
-                    && ((*s & S_or_s_mask) == s_masked)
-                    && ((*(s+1) & S_or_s_mask) == s_masked))
+                    && isARG2_lower_or_UPPER_ARG1('s', *s)
+                    && isARG2_lower_or_UPPER_ARG1('s', *(s+1)))
                  {
  
                      /* EXACTF nodes need to know that the minimum length
@@ -6242,7 +6250,7 @@ reStudy:
              && data.last_start_min == 0 && data.last_end > 0
              && !RExC_seen_zerolen
              && !(RExC_seen & REG_SEEN_VERBARG)
-            && (!(RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS)))
+            && !((RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS)))
             r->extflags |= RXf_CHECK_ALL;
         scan_commit(pRExC_state, &data,&minlen,0);
  
@@ -6339,9 +6347,7 @@ reStudy:
             r->check_offset_min = r->float_min_offset;
             r->check_offset_max = r->float_max_offset;
         }
-       /* XXXX Currently intuiting is not compatible with ANCH_GPOS.
-          This should be changed ASAP!  */
-       if ((r->check_substr || r->check_utf8) && !(r->extflags & RXf_ANCH_GPOS)) {
+       if ((r->check_substr || r->check_utf8) ) {
             r->extflags |= RXf_USE_INTUIT;
             if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
                 r->extflags |= RXf_INTUIT_TAIL;
@@ -6728,13 +6734,23 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
  
      PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
          
-    if ( (    n == RX_BUFF_IDX_CARET_PREMATCH
+    if (      n == RX_BUFF_IDX_CARET_PREMATCH
             || n == RX_BUFF_IDX_CARET_FULLMATCH
             || n == RX_BUFF_IDX_CARET_POSTMATCH
-         )
-         && !(rx->extflags & RXf_PMf_KEEPCOPY)
-    )
-        goto ret_undef;
+       )
+    {
+        bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
+        if (!keepcopy) {
+            /* on something like
+             *    $r = qr/.../;
+             *    /$qr/p;
+             * the KEEPCOPY is set on the PMOP rather than the regex */
+            if (PL_curpm && r == PM_GETRE(PL_curpm))
+                 keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
+        }
+        if (!keepcopy)
+            goto ret_undef;
+    }
  
      if (!rx->subbeg)
          goto ret_undef;
@@ -6840,13 +6856,27 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
  
      PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
  
+    if (   paren == RX_BUFF_IDX_CARET_PREMATCH
+        || paren == RX_BUFF_IDX_CARET_FULLMATCH
+        || paren == RX_BUFF_IDX_CARET_POSTMATCH
+    )
+    {
+        bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
+        if (!keepcopy) {
+            /* on something like
+             *    $r = qr/.../;
+             *    /$qr/p;
+             * the KEEPCOPY is set on the PMOP rather than the regex */
+            if (PL_curpm && r == PM_GETRE(PL_curpm))
+                 keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
+        }
+        if (!keepcopy)
+            goto warn_undef;
+    }
+
      /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
      switch (paren) {
        case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */
-         if (!(rx->extflags & RXf_PMf_KEEPCOPY))
-            goto warn_undef;
-        /*FALLTHROUGH*/
-
        case RX_BUFF_IDX_PREMATCH:       /* $` */
          if (rx->offs[0].start != -1) {
                         i = rx->offs[0].start;
@@ -6859,8 +6889,6 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
          return 0;
  
        case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */
-         if (!(rx->extflags & RXf_PMf_KEEPCOPY))
-            goto warn_undef;
        case RX_BUFF_IDX_POSTMATCH:       /* $' */
             if (rx->offs[0].end != -1) {
                         i = rx->sublen - rx->offs[0].end;
@@ -6872,13 +6900,7 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
             }
          return 0;
  
-      case RX_BUFF_IDX_CARET_FULLMATCH: /* ${^MATCH} */
-         if (!(rx->extflags & RXf_PMf_KEEPCOPY))
-            goto warn_undef;
-        /*FALLTHROUGH*/
-
-      /* $& / ${^MATCH}, $1, $2, ... */
-      default:
+      default: /* $& / ${^MATCH}, $1, $2, ... */
             if (paren <= (I32)rx->nparens &&
              (s1 = rx->offs[paren].start) != -1 &&
              (t1 = rx->offs[paren].end) != -1)
@@ -7026,11 +7048,12 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  /* This section of code defines the inversion list object and its methods.  The
   * interfaces are highly subject to change, so as much as possible is static to
   * this file.  An inversion list is here implemented as a malloc'd C UV array
- * with some added info that is placed as UVs at the beginning in a header
- * portion.  An inversion list for Unicode is an array of code points, sorted
- * by ordinal number.  The zeroth element is the first code point in the list.
- * The 1th element is the first element beyond that not in the list.  In other
- * words, the first range is
+ * as an SVt_INVLIST scalar.
+ *
+ * An inversion list for Unicode is an array of code points, sorted by ordinal
+ * number.  The zeroth element is the first code point in the list.  The 1th
+ * element is the first element beyond that not in the list.  In other words,
+ * the first range is
   *  invlist[0]..(invlist[1]-1)
   * The other ranges follow.  Thus every element whose index is divisible by two
   * marks the beginning of a range that is in the list, and every element not
@@ -7047,10 +7070,10 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
   * list.)
   * Taking the complement (inverting) an inversion list is quite simple, if the
   * first element is 0, remove it; otherwise add a 0 element at the beginning.
- * This implementation reserves an element (considered to be the final element
- * of the header) at the beginning of each inversion list to always contain 0;
- * there is an additional flag in the header which indicates if the list begins
- * at the 0, or is offset to begin at the next element.
+ * This implementation reserves an element at the beginning of each inversion
+ * list to always contain 0; there is an additional flag in the header which
+ * indicates if the list begins at the 0, or is offset to begin at the next
+ * element.
   *
   * More about inversion lists can be found in "Unicode Demystified"
   * Chapter 13 by Richard Gillam, published by Addison-Wesley.
@@ -7065,10 +7088,6 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
   * should eventually be made public */
  
  /* The header definitions are in F<inline_invlist.c> */
-#define TO_INTERNAL_SIZE(x) (((x) + HEADER_LENGTH) * sizeof(UV))
-#define FROM_INTERNAL_SIZE(x) (((x)/ sizeof(UV)) - HEADER_LENGTH)
-
-#define INVLIST_INITIAL_LEN 10
  
  PERL_STATIC_INLINE UV*
  S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
@@ -7078,20 +7097,22 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
       * array begins depends on whether the list has the code point U+0000 in it
       * or not.  The other parameter tells it whether the code that follows this
       * call is about to put a 0 in the inversion list or not.  The first
-     * element is either the final part of the header reserved for 0, if TRUE,
-     * or the first element of the non-heading part, if FALSE */
+     * element is either the element reserved for 0, if TRUE, or the element
+     * after it, if FALSE */
  
-    UV* zero = get_invlist_zero_addr(invlist);
+    bool* offset = get_invlist_offset_addr(invlist);
+    UV* zero_addr = (UV *) SvPVX(invlist);
  
      PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT;
  
      /* Must be empty */
-    assert(! *_get_invlist_len_addr(invlist));
+    assert(! _invlist_len(invlist));
+
+    *zero_addr = 0;
  
      /* 1^1 = 0; 1^0 = 1 */
-    *zero = 1 ^ will_have_0;
-    *(zero + 1) = 0;
-    return 1 + zero + *zero;
+    *offset = 1 ^ will_have_0;
+    return zero_addr + *offset;
  }
  
  PERL_STATIC_INLINE UV*
@@ -7105,43 +7126,44 @@ S_invlist_array(pTHX_ SV* const invlist)
  
      /* Must not be empty.  If these fail, you probably didn't check for <len>
       * being non-zero before trying to get the array */
-    assert(*_get_invlist_len_addr(invlist));
-    assert(*get_invlist_zero_addr(invlist) == 0
-          || *get_invlist_zero_addr(invlist) == 1);
-
-    /* The array begins either at the header element reserved for zero or the
-     * element after that.  The reserved element is 1 past the zero_addr
-     * element; the latter contains 0 or 1 to indicate how much additionally to
-     * add */
-    assert(0 == *(1 + get_invlist_zero_addr(invlist)));
-    return (UV *) (1 + get_invlist_zero_addr(invlist)
-                  + *get_invlist_zero_addr(invlist));
+    assert(_invlist_len(invlist));
+
+    /* The very first element always contains zero, The array begins either
+     * there, or if the inversion list is offset, at the element after it.
+     * The offset header field determines which; it contains 0 or 1 to indicate
+     * how much additionally to add */
+    assert(0 == *(SvPVX(invlist)));
+    return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist));
  }
  
  PERL_STATIC_INLINE void
-S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
+S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  {
-    /* Sets the current number of elements stored in the inversion list */
+    /* Sets the current number of elements stored in the inversion list.
+     * Updates SvCUR correspondingly */
  
      PERL_ARGS_ASSERT_INVLIST_SET_LEN;
  
-    *_get_invlist_len_addr(invlist) = len;
+    assert(SvTYPE(invlist) == SVt_INVLIST);
  
-    assert(len <= SvLEN(invlist));
-
-    SvCUR_set(invlist, TO_INTERNAL_SIZE(len));
-    /* Note that when inverting, SvCUR shouldn't change */
+    SvCUR_set(invlist,
+              (len == 0)
+               ? 0
+               : TO_INTERNAL_SIZE(len + offset));
+    assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist));
  }
  
  PERL_STATIC_INLINE IV*
  S_get_invlist_previous_index_addr(pTHX_ SV* invlist)
  {
-    /* Return the address of the UV that is reserved to hold the cached index
+    /* Return the address of the IV that is reserved to hold the cached index
       * */
  
      PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
  
-    return (IV *) (SvPVX(invlist) + (INVLIST_PREVIOUS_INDEX_OFFSET * sizeof (UV)));
+    assert(SvTYPE(invlist) == SVt_INVLIST);
+
+    return &(((XINVLIST*) SvANY(invlist))->prev_index);
  }
  
  PERL_STATIC_INLINE IV
@@ -7174,20 +7196,13 @@ S_invlist_max(pTHX_ SV* const invlist)
  
      PERL_ARGS_ASSERT_INVLIST_MAX;
  
-    return SvLEN(invlist) == 0  /* This happens under _new_invlist_C_array */
-           ? _invlist_len(invlist)
-           : FROM_INTERNAL_SIZE(SvLEN(invlist));
-}
-
-PERL_STATIC_INLINE UV*
-S_get_invlist_zero_addr(pTHX_ SV* invlist)
-{
-    /* Return the address of the UV that says whether the inversion list is
-     * offset (it contains 1) or not (contains 0) */
-
-    PERL_ARGS_ASSERT_GET_INVLIST_ZERO_ADDR;
+    assert(SvTYPE(invlist) == SVt_INVLIST);
  
-    return (UV *) (SvPVX(invlist) + (INVLIST_ZERO_OFFSET * sizeof (UV)));
+    /* Assumes worst case, in which the 0 element is not counted in the
+     * inversion list, so subtracts 1 for that */
+    return SvLEN(invlist) == 0  /* This happens under _new_invlist_C_array */
+           ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
+           : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
  }
  
  #ifndef PERL_IN_XSUB_RE
@@ -7200,61 +7215,77 @@ Perl__new_invlist(pTHX_ IV initial_size)
       * system default is used instead */
  
      SV* new_list;
-    UV* zero_addr;
  
      if (initial_size < 0) {
-       initial_size = INVLIST_INITIAL_LEN;
+       initial_size = 10;
      }
  
      /* Allocate the initial space */
-    new_list = newSV(TO_INTERNAL_SIZE(initial_size));
-    invlist_set_len(new_list, 0);
+    new_list = newSV_type(SVt_INVLIST);
  
-    /* Force iterinit() to be used to get iteration to work */
-    *get_invlist_iter_addr(new_list) = UV_MAX;
+    /* First 1 is in case the zero element isn't in the list; second 1 is for
+     * trailing NUL */
+    SvGROW(new_list, TO_INTERNAL_SIZE(initial_size + 1) + 1);
+    invlist_set_len(new_list, 0, 0);
  
-    /* This should force a segfault if a method doesn't initialize this
-     * properly */
-    zero_addr = get_invlist_zero_addr(new_list);
-    *zero_addr = UV_MAX;
-    *(zero_addr + 1) = 0;
+    /* Force iterinit() to be used to get iteration to work */
+    *get_invlist_iter_addr(new_list) = (STRLEN) UV_MAX;
  
      *get_invlist_previous_index_addr(new_list) = 0;
-    *get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID;
-#if HEADER_LENGTH != 6
-#   error Need to regenerate INVLIST_VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length
-#endif
  
      return new_list;
  }
  #endif
  
  STATIC SV*
-S__new_invlist_C_array(pTHX_ UV* list)
+S__new_invlist_C_array(pTHX_ const UV* const list)
  {
      /* Return a pointer to a newly constructed inversion list, initialized to
       * point to <list>, which has to be in the exact correct inversion list
       * form, including internal fields.  Thus this is a dangerous routine that
-     * should not be used in the wrong hands */
+     * should not be used in the wrong hands.  The passed in 'list' contains
+     * several header fields at the beginning that are not part of the
+     * inversion list body proper */
+
+    const STRLEN length = (STRLEN) list[0];
+    const UV version_id =          list[1];
+    const bool offset   =    cBOOL(list[2]);
+#define HEADER_LENGTH 3
+    /* If any of the above changes in any way, you must change HEADER_LENGTH
+     * (if appropriate) and regenerate INVLIST_VERSION_ID by running
+     *      perl -E 'say int(rand 2**31-1)'
+     */
+#define INVLIST_VERSION_ID 148565664 /* This is a combination of a version and
+                                        data structure type, so that one being
+                                        passed in can be validated to be an
+                                        inversion list of the correct vintage.
+                                       */
  
-    SV* invlist = newSV_type(SVt_PV);
+    SV* invlist = newSV_type(SVt_INVLIST);
  
      PERL_ARGS_ASSERT__NEW_INVLIST_C_ARRAY;
  
-    SvPV_set(invlist, (char *) list);
+    if (version_id != INVLIST_VERSION_ID) {
+        Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
+    }
+
+    /* The generated array passed in includes header elements that aren't part
+     * of the list proper, so start it just after them */
+    SvPV_set(invlist, (char *) (list + HEADER_LENGTH));
+
      SvLEN_set(invlist, 0);  /* Means we own the contents, and the system
                                shouldn't touch it */
-    SvCUR_set(invlist, TO_INTERNAL_SIZE(_invlist_len(invlist)));
  
-    if (*get_invlist_version_id_addr(invlist) != INVLIST_VERSION_ID) {
-        Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
-    }
+    *(get_invlist_offset_addr(invlist)) = offset;
+
+    /* The 'length' passed to us is the physical number of elements in the
+     * inversion list.  But if there is an offset the logical number is one
+     * less than that */
+    invlist_set_len(invlist, length  - offset, offset);
  
-    /* Initialize the iteration pointer.
-     * XXX This could be done at compile time in charclass_invlists.h, but I
-     * (khw) am not confident that the suffixes for specifying the C constant
-     * UV_MAX are portable, e.g.  'ull' on a 32 bit machine that is configured
-     * to use 64 bits; might need a Configure probe */
+    invlist_set_previous_index(invlist, 0);
+
+    /* Initialize the iteration pointer. */
      invlist_iterfinish(invlist);
  
      return invlist;
@@ -7267,7 +7298,11 @@ S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
  
      PERL_ARGS_ASSERT_INVLIST_EXTEND;
  
-    SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max));
+    assert(SvTYPE(invlist) == SVt_INVLIST);
+
+    /* Add one to account for the zero element at the beginning which may not
+     * be counted by the calling parameters */
+    SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1));
  }
  
  PERL_STATIC_INLINE void
@@ -7275,9 +7310,10 @@ S_invlist_trim(pTHX_ SV* const invlist)
  {
      PERL_ARGS_ASSERT_INVLIST_TRIM;
  
+    assert(SvTYPE(invlist) == SVt_INVLIST);
+
      /* Change the length of the inversion list to how many entries it currently
       * has */
-
      SvPV_shrink_to_cur((SV *) invlist);
  }
  
@@ -7293,11 +7329,13 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
      UV* array;
      UV max = invlist_max(invlist);
      UV len = _invlist_len(invlist);
+    bool offset;
  
      PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST;
  
      if (len == 0) { /* Empty lists must be initialized */
-        array = _invlist_array_init(invlist, start == 0);
+        offset = start != 0;
+        array = _invlist_array_init(invlist, ! offset);
      }
      else {
         /* Here, the existing list is non-empty. The current max entry in the
@@ -7320,6 +7358,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
          * value not in the set, it is extending the set, so the new first
          * value not in the set is one greater than the newly extended range.
          * */
+        offset = *get_invlist_offset_addr(invlist);
         if (array[final_element] == start) {
             if (end != UV_MAX) {
                 array[final_element] = end + 1;
@@ -7327,7 +7366,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
             else {
                 /* But if the end is the maximum representable on the machine,
                  * just let the range that this would extend to have no end */
-               invlist_set_len(invlist, len - 1);
+               invlist_set_len(invlist, len - 1, offset);
             }
             return;
         }
@@ -7337,16 +7376,18 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
  
      len += 2;  /* Includes an element each for the start and end of range */
  
-    /* If overflows the existing space, extend, which may cause the array to be
-     * moved */
+    /* If wll overflow the existing space, extend, which may cause the array to
+     * be moved */
      if (max < len) {
         invlist_extend(invlist, len);
-       invlist_set_len(invlist, len);  /* Have to set len here to avoid assert
-                                          failure in invlist_array() */
+
+        /* Have to set len here to avoid assert failure in invlist_array() */
+        invlist_set_len(invlist, len, offset);
+
         array = invlist_array(invlist);
      }
      else {
-       invlist_set_len(invlist, len);
+       invlist_set_len(invlist, len, offset);
      }
  
      /* The next item on the list starts the range, the one after that is
@@ -7358,7 +7399,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
      else {
         /* But if the end is the maximum representable on the machine, just let
          * the range have no end */
-       invlist_set_len(invlist, len - 1);
+       invlist_set_len(invlist, len - 1, offset);
      }
  }
  
@@ -7743,7 +7784,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const b
      /* Set result to final length, which can change the pointer to array_u, so
       * re-find it */
      if (len_u != _invlist_len(u)) {
-       invlist_set_len(u, len_u);
+       invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
         invlist_trim(u);
         array_u = invlist_array(u);
      }
@@ -7817,7 +7858,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      assert(a != b);
  
      /* Special case if either one is empty */
-    len_a = _invlist_len(a);
+    len_a = (a == NULL) ? 0 : _invlist_len(a);
      if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
  
          if (len_a != 0 && complement_b) {
@@ -7827,11 +7868,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
               * must be every possible code point.  Thus the intersection is
               * simply 'a'. */
              if (*i != a) {
-                *i = invlist_clone(a);
-
                  if (*i == b) {
                      SvREFCNT_dec_NN(b);
                  }
+
+                *i = invlist_clone(a);
              }
              /* else *i is already 'a' */
              return;
@@ -7959,7 +8000,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
      /* Set result to final length, which can change the pointer to array_r, so
       * re-find it */
      if (len_r != _invlist_len(r)) {
-       invlist_set_len(r, len_r);
+       invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
         invlist_trim(r);
         array_r = invlist_array(r);
      }
@@ -8043,27 +8084,17 @@ Perl__invlist_invert(pTHX_ SV* const invlist)
       * have a zero; removes it otherwise.  As described above, the data
       * structure is set up so that this is very efficient */
  
-    UV* len_pos = _get_invlist_len_addr(invlist);
-
      PERL_ARGS_ASSERT__INVLIST_INVERT;
  
      assert(! invlist_is_iterating(invlist));
  
      /* The inverse of matching nothing is matching everything */
-    if (*len_pos == 0) {
+    if (_invlist_len(invlist) == 0) {
         _append_range_to_invlist(invlist, 0, UV_MAX);
         return;
      }
  
-    /* The exclusive or complents 0 to 1; and 1 to 0.  If the result is 1, the
-     * zero element was a 0, so it is being removed, so the length decrements
-     * by 1; and vice-versa.  SvCUR is unaffected */
-    if (*get_invlist_zero_addr(invlist) ^= 1) {
-       (*len_pos)--;
-    }
-    else {
-       (*len_pos)++;
-    }
+    *get_invlist_offset_addr(invlist) = ! *get_invlist_offset_addr(invlist);
  }
  
  void
@@ -8093,11 +8124,11 @@ Perl__invlist_invert_prop(pTHX_ SV* const invlist)
                 invlist_extend(invlist, len);
                 array = invlist_array(invlist);
             }
-           invlist_set_len(invlist, len);
+           invlist_set_len(invlist, len, *get_invlist_offset_addr(invlist));
             array[len - 1] = PERL_UNICODE_MAX + 1;
         }
         else {  /* Remove the 0x110000 */
-           invlist_set_len(invlist, len - 1);
+           invlist_set_len(invlist, len - 1, *get_invlist_offset_addr(invlist));
         }
      }
  
@@ -8115,17 +8146,19 @@ S_invlist_clone(pTHX_ SV* const invlist)
      /* Need to allocate extra space to accommodate Perl's addition of a
       * trailing NUL to SvPV's, since it thinks they are always strings */
      SV* new_invlist = _new_invlist(_invlist_len(invlist) + 1);
-    STRLEN length = SvCUR(invlist);
+    STRLEN physical_length = SvCUR(invlist);
+    bool offset = *(get_invlist_offset_addr(invlist));
  
      PERL_ARGS_ASSERT_INVLIST_CLONE;
  
-    SvCUR_set(new_invlist, length); /* This isn't done automatically */
-    Copy(SvPVX(invlist), SvPVX(new_invlist), length, char);
+    *(get_invlist_offset_addr(new_invlist)) = offset;
+    invlist_set_len(new_invlist, _invlist_len(invlist), offset);
+    Copy(SvPVX(invlist), SvPVX(new_invlist), physical_length, char);
  
      return new_invlist;
  }
  
-PERL_STATIC_INLINE UV*
+PERL_STATIC_INLINE STRLEN*
  S_get_invlist_iter_addr(pTHX_ SV* invlist)
  {
      /* Return the address of the UV that contains the current iteration
@@ -8133,17 +8166,9 @@ S_get_invlist_iter_addr(pTHX_ SV* invlist)
  
      PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR;
  
-    return (UV *) (SvPVX(invlist) + (INVLIST_ITER_OFFSET * sizeof (UV)));
-}
-
-PERL_STATIC_INLINE UV*
-S_get_invlist_version_id_addr(pTHX_ SV* invlist)
-{
-    /* Return the address of the UV that contains the version id. */
-
-    PERL_ARGS_ASSERT_GET_INVLIST_VERSION_ID_ADDR;
+    assert(SvTYPE(invlist) == SVt_INVLIST);
  
-    return (UV *) (SvPVX(invlist) + (INVLIST_VERSION_ID_OFFSET * sizeof (UV)));
+    return &(((XINVLIST*) SvANY(invlist))->iterator);
  }
  
  PERL_STATIC_INLINE void
@@ -8167,7 +8192,7 @@ S_invlist_iterfinish(pTHX_ SV* invlist)
  
      PERL_ARGS_ASSERT_INVLIST_ITERFINISH;
  
-    *get_invlist_iter_addr(invlist) = UV_MAX;
+    *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX;
  }
  
  STATIC bool
@@ -8180,14 +8205,14 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
       * <*start> and <*end> are unchanged, and the next call to this function
       * will start over at the beginning of the list */
  
-    UV* pos = get_invlist_iter_addr(invlist);
+    STRLEN* pos = get_invlist_iter_addr(invlist);
      UV len = _invlist_len(invlist);
      UV *array;
  
      PERL_ARGS_ASSERT_INVLIST_ITERNEXT;
  
      if (*pos >= len) {
-       *pos = UV_MAX;  /* Force iterinit() to be required next time */
+       *pos = (STRLEN) UV_MAX; /* Force iterinit() to be required next time */
         return FALSE;
      }
  
@@ -8210,7 +8235,7 @@ S_invlist_is_iterating(pTHX_ SV* const invlist)
  {
      PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
  
-    return *(get_invlist_iter_addr(invlist)) < UV_MAX;
+    return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
  }
  
  PERL_STATIC_INLINE UV
@@ -8276,42 +8301,58 @@ Perl__invlist_contents(pTHX_ SV* const invlist)
  }
  #endif
  
-#ifdef PERL_ARGS_ASSERT__INVLIST_DUMP
+#ifndef PERL_IN_XSUB_RE
  void
-Perl__invlist_dump(pTHX_ SV* const invlist, const char * const header)
+Perl__invlist_dump(pTHX_ PerlIO *file, I32 level, const char * const indent, SV* const invlist)
  {
-    /* Dumps out the ranges in an inversion list.  The string 'header'
-     * if present is output on a line before the first range */
+    /* Designed to be called only by do_sv_dump().  Dumps out the ranges of the
+     * inversion list 'invlist' to 'file' at 'level'  Each line is prefixed by
+     * the string 'indent'.  The output looks like this:
+         [0] 0x000A .. 0x000D
+         [2] 0x0085
+         [4] 0x2028 .. 0x2029
+         [6] 0x3104 .. INFINITY
+     * This means that the first range of code points matched by the list are
+     * 0xA through 0xD; the second range contains only the single code point
+     * 0x85, etc.  An inversion list is an array of UVs.  Two array elements
+     * are used to define each range (except if the final range extends to
+     * infinity, only a single element is needed).  The array index of the
+     * first element for the corresponding range is given in brackets. */
  
      UV start, end;
+    STRLEN count = 0;
  
      PERL_ARGS_ASSERT__INVLIST_DUMP;
  
-    if (header && strlen(header)) {
-       PerlIO_printf(Perl_debug_log, "%s\n", header);
-    }
      if (invlist_is_iterating(invlist)) {
-        PerlIO_printf(Perl_debug_log, "Can't dump because is in middle of iterating\n");
+        Perl_dump_indent(aTHX_ level, file,
+             "%sCan't dump inversion list because is in middle of iterating\n",
+             indent);
          return;
      }
  
      invlist_iterinit(invlist);
      while (invlist_iternext(invlist, &start, &end)) {
         if (end == UV_MAX) {
-           PerlIO_printf(Perl_debug_log, "0x%04"UVXf" .. INFINITY\n", start);
+           Perl_dump_indent(aTHX_ level, file,
+                                       "%s[%"UVuf"] 0x%04"UVXf" .. INFINITY\n",
+                                   indent, (UV)count, start);
         }
         else if (end != start) {
-           PerlIO_printf(Perl_debug_log, "0x%04"UVXf" .. 0x%04"UVXf"\n",
-                                                start,         end);
+           Perl_dump_indent(aTHX_ level, file,
+                                    "%s[%"UVuf"] 0x%04"UVXf" .. 0x%04"UVXf"\n",
+                               indent, (UV)count, start,         end);
         }
         else {
-           PerlIO_printf(Perl_debug_log, "0x%04"UVXf"\n", start);
+           Perl_dump_indent(aTHX_ level, file, "%s[%"UVuf"] 0x%04"UVXf"\n",
+                                            indent, (UV)count, start);
         }
+        count += 2;
      }
  }
  #endif
  
-#if 0
+#ifdef PERL_ARGS_ASSERT__INVLISTEQ
  bool
  S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
  {
@@ -8353,7 +8394,6 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
               * at the 0 that is always stored immediately before the array. */
              array_b--;
              len_b++;
-            array_b[0] = 0;
          }
      }
  
@@ -8375,14 +8415,9 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
  #endif
  
  #undef HEADER_LENGTH
-#undef INVLIST_INITIAL_LENGTH
  #undef TO_INTERNAL_SIZE
  #undef FROM_INTERNAL_SIZE
-#undef INVLIST_LEN_OFFSET
-#undef INVLIST_ZERO_OFFSET
-#undef INVLIST_ITER_OFFSET
  #undef INVLIST_VERSION_ID
-#undef INVLIST_PREVIOUS_INDEX_OFFSET
  
  /* End of inversion list object */
  
@@ -10717,7 +10752,7 @@ tryagain:
  
         defchar: {
             STRLEN len = 0;
-           UV ender;
+           UV ender = 0;
             char *p;
             char *s;
  #define MAX_NODE_STRING_SIZE 127
@@ -10725,17 +10760,22 @@ tryagain:
             char *s0;
             U8 upper_parse = MAX_NODE_STRING_SIZE;
             STRLEN foldlen;
-            U8 node_type;
+            U8 node_type = compute_EXACTish(pRExC_state);
              bool next_is_quantifier;
              char * oldp = NULL;
  
+            /* We can convert EXACTF nodes to EXACTFU if they contain only
+             * characters that match identically regardless of the target
+             * string's UTF8ness.  The reason to do this is that EXACTF is not
+             * trie-able, EXACTFU is.  (We don't need to figure this out until
+             * pass 2) */
+            bool maybe_exactfu = node_type == EXACTF && PASS2;
+
              /* If a folding node contains only code points that don't
               * participate in folds, it can be changed into an EXACT node,
               * which allows the optimizer more things to look for */
              bool maybe_exact;
  
-           ender = 0;
-            node_type = compute_EXACTish(pRExC_state);
             ret = reg_node(pRExC_state, node_type);
  
              /* In pass1, folded, we use a temporary buffer instead of the
@@ -10748,8 +10788,8 @@ tryagain:
  
              /* We do the EXACTFish to EXACT node only if folding, and not if in
               * locale, as whether a character folds or not isn't known until
-             * runtime */
-            maybe_exact = FOLD && ! LOC;
+             * runtime.  (And we don't need to figure this out until pass 2) */
+            maybe_exact = FOLD && ! LOC && PASS2;
  
             /* XXX The node can hold up to 255 bytes, yet this only goes to
               * 127.  I (khw) do not know why.  Keeping it somewhat less than
@@ -11063,8 +11103,23 @@ tryagain:
                          || (node_type == EXACTFU
                              && ender == LATIN_SMALL_LETTER_SHARP_S)))
                  {
+                    if (IS_IN_SOME_FOLD_L1(ender)) {
+                        maybe_exact = FALSE;
+
+                        /* See if the character's fold differs between /d and
+                         * /u.  This includes the multi-char fold SHARP S to
+                         * 'ss' */
+                        if (maybe_exactfu
+                            && (PL_fold[ender] != PL_fold_latin1[ender]
+                                || ender == LATIN_SMALL_LETTER_SHARP_S
+                                || (len > 0
+                                   && isARG2_lower_or_UPPER_ARG1('s', ender)
+                                   && isARG2_lower_or_UPPER_ARG1('s', *(s-1)))))
+                        {
+                            maybe_exactfu = FALSE;
+                        }
+                    }
                      *(s++) = (char) ender;
-                    maybe_exact &= ! IS_IN_SOME_FOLD_L1(ender);
                  }
                  else {  /* UTF */
  
@@ -11253,6 +11308,15 @@ tryagain:
                   * do any better */
                  if (len == 0) {
                      len = full_len;
+
+                    /* If the node ends in an 's' we make sure it stays EXACTF,
+                     * as if it turns into an EXACTFU, it could later get
+                     * joined with another 's' that would then wrongly match
+                     * the sharp s */
+                    if (maybe_exactfu && isARG2_lower_or_UPPER_ARG1('s', ender))
+                    {
+                        maybe_exactfu = FALSE;
+                    }
                  } else {
  
                      /* Here, the node does contain some characters that aren't
@@ -11311,12 +11375,19 @@ tryagain:
              if (len == 0) {
                  OP(ret) = NOTHING;
              }
-            else{
-
-                /* If 'maybe_exact' is still set here, means there are no
-                 * code points in the node that participate in folds */
-                if (FOLD && maybe_exact) {
-                    OP(ret) = EXACT;
+            else {
+                if (FOLD) {
+                    /* If 'maybe_exact' is still set here, means there are no
+                     * code points in the node that participate in folds;
+                     * similarly for 'maybe_exactfu' and code points that match
+                     * differently depending on UTF8ness of the target string
+                     * */
+                    if (maybe_exact) {
+                        OP(ret) = EXACT;
+                    }
+                    else if (maybe_exactfu) {
+                        OP(ret) = EXACTFU;
+                    }
                  }
                  alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender);
              }
@@ -13888,8 +13959,6 @@ parseit:
             for (i = start; i <= (int) high; i++) {
                 if (! ANYOF_BITMAP_TEST(ret, i)) {
                     ANYOF_BITMAP_SET(ret, i);
-                   prevvalue = value;
-                   value = i;
                 }
             }
         }
@@ -14692,26 +14761,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
              )
          );
          if ( IS_ANYOF_TRIE(op) || trie->bitmap ) {
-            int i;
-            int rangestart = -1;
-            U8* bitmap = IS_ANYOF_TRIE(op) ? (U8*)ANYOF_BITMAP(o) : (U8*)TRIE_BITMAP(trie);
              sv_catpvs(sv, "[");
-            for (i = 0; i <= 256; i++) {
-                if (i < 256 && BITMAP_TEST(bitmap,i)) {
-                    if (rangestart == -1)
-                        rangestart = i;
-                } else if (rangestart != -1) {
-                    if (i <= rangestart + 3)
-                        for (; rangestart < i; rangestart++)
-                            put_byte(sv, rangestart);
-                    else {
-                        put_byte(sv, rangestart);
-                        sv_catpvs(sv, "-");
-                        put_byte(sv, i - 1);
-                    }
-                    rangestart = -1;
-                }
-            }
+            (void) put_latin1_charclass_innards(sv, IS_ANYOF_TRIE(op)
+                                                   ? ANYOF_BITMAP(o)
+                                                   : TRIE_BITMAP(trie));
              sv_catpvs(sv, "]");
          } 
          
@@ -14755,7 +14808,6 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
      } else if (k == LOGICAL)
         Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);     /* 2: embedded, otherwise 1 */
      else if (k == ANYOF) {
-       int i, rangestart = -1;
         const U8 flags = ANYOF_FLAGS(o);
         int do_sep = 0;
  
@@ -14769,32 +14821,19 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
             sv_catpvs(sv, "^");
  
         /* output what the standard cp 0-255 bitmap matches */
-       for (i = 0; i <= 256; i++) {
-           if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
-               if (rangestart == -1)
-                   rangestart = i;
-           } else if (rangestart != -1) {
-               if (i <= rangestart + 3)
-                   for (; rangestart < i; rangestart++)
-                       put_byte(sv, rangestart);
-               else {
-                   put_byte(sv, rangestart);
-                   sv_catpvs(sv, "-");
-                   put_byte(sv, i - 1);
-               }
-               do_sep = 1;
-               rangestart = -1;
-           }
-       }
+        do_sep = put_latin1_charclass_innards(sv, ANYOF_BITMAP(o));
          
          EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
          /* output any special charclass tests (used entirely under use locale) */
-       if (ANYOF_CLASS_TEST_ANY_SET(o))
-           for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
+       if (ANYOF_CLASS_TEST_ANY_SET(o)) {
+            int i;
+           for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++) {
                 if (ANYOF_CLASS_TEST(o,i)) {
                     sv_catpv(sv, anyofs[i]);
                     do_sep = 1;
                 }
+            }
+        }
          
          EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
          
@@ -14805,91 +14844,61 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
          /* output information about the unicode matching */
         if (flags & ANYOF_UNICODE_ALL)
             sv_catpvs(sv, "{unicode_all}");
-       else if (ANYOF_NONBITMAP(o))
-           sv_catpvs(sv, "{unicode}");
-       if (flags & ANYOF_NONBITMAP_NON_UTF8)
-           sv_catpvs(sv, "{outside bitmap}");
-
-       if (ANYOF_NONBITMAP(o)) {
-           SV *lv; /* Set if there is something outside the bit map */
-           SV * const sw = regclass_swash(prog, o, FALSE, &lv, NULL);
+       else if (ANYOF_NONBITMAP(o)) {
+            SV *lv; /* Set if there is something outside the bit map. */
              bool byte_output = FALSE;   /* If something in the bitmap has been
                                             output */
  
-           if (lv && lv != &PL_sv_undef) {
-               if (sw) {
-                   U8 s[UTF8_MAXBYTES_CASE+1];
-
-                   for (i = 0; i <= 256; i++) { /* Look at chars in bitmap */
-                       uvchr_to_utf8(s, i);
-
-                       if (i < 256
-                            && ! ANYOF_BITMAP_TEST(o, i)    /* Don't duplicate
-                                                               things already
-                                                               output as part
-                                                               of the bitmap */
-                            && swash_fetch(sw, s, TRUE))
-                        {
-                           if (rangestart == -1)
-                               rangestart = i;
-                       } else if (rangestart != -1) {
-                            byte_output = TRUE;
-                           if (i <= rangestart + 3)
-                               for (; rangestart < i; rangestart++) {
-                                   put_byte(sv, rangestart);
-                               }
-                           else {
-                               put_byte(sv, rangestart);
-                               sv_catpvs(sv, "-");
-                               put_byte(sv, i-1);
-                           }
-                           rangestart = -1;
-                       }
-                   }
-               }
+            if (flags & ANYOF_NONBITMAP_NON_UTF8) {
+                sv_catpvs(sv, "{outside bitmap}");
+            }
+            else {
+                sv_catpvs(sv, "{utf8}");
+            }
  
-               {
-                   char *s = savesvpv(lv);
-                   char * const origs = s;
+            /* Get the stuff that wasn't in the bitmap */
+           (void) regclass_swash(prog, o, FALSE, &lv, NULL);
+           if (lv && lv != &PL_sv_undef) {
+                char *s = savesvpv(lv);
+                char * const origs = s;
  
-                   while (*s && *s != '\n')
-                       s++;
+                while (*s && *s != '\n')
+                    s++;
  
-                   if (*s == '\n') {
-                       const char * const t = ++s;
+                if (*s == '\n') {
+                    const char * const t = ++s;
  
-                        if (byte_output) {
-                            sv_catpvs(sv, " ");
-                        }
+                    if (byte_output) {
+                        sv_catpvs(sv, " ");
+                    }
  
-                       while (*s) {
-                           if (*s == '\n') {
+                    while (*s) {
+                        if (*s == '\n') {
  
-                                /* Truncate very long output */
-                               if (s - origs > 256) {
-                                   Perl_sv_catpvf(aTHX_ sv,
-                                                  "%.*s...",
-                                                  (int) (s - origs - 1),
-                                                  t);
-                                   goto out_dump;
-                               }
-                               *s = ' ';
-                           }
-                           else if (*s == '\t') {
-                               *s = '-';
-                           }
-                           s++;
-                       }
-                       if (s[-1] == ' ')
-                           s[-1] = 0;
+                            /* Truncate very long output */
+                            if (s - origs > 256) {
+                                Perl_sv_catpvf(aTHX_ sv,
+                                               "%.*s...",
+                                               (int) (s - origs - 1),
+                                               t);
+                                goto out_dump;
+                            }
+                            *s = ' ';
+                        }
+                        else if (*s == '\t') {
+                            *s = '-';
+                        }
+                        s++;
+                    }
+                    if (s[-1] == ' ')
+                        s[-1] = 0;
  
-                       sv_catpv(sv, t);
-                   }
+                    sv_catpv(sv, t);
+                }
  
-               out_dump:
+            out_dump:
  
-                   Safefree(origs);
-               }
+                Safefree(origs);
                 SvREFCNT_dec_NN(lv);
             }
         }
@@ -15286,7 +15295,6 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
                so we need to copy it locally.  */
      RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED(sstr), SvCUR(sstr)+1);
      ret->mother_re   = NULL;
-    ret->gofs = 0;
  }
  #endif /* PERL_IN_XSUB_RE */
  
@@ -15519,12 +15527,17 @@ S_put_byte(pTHX_ SV *sv, int c)
  
         So the old condition can be simplified to !isPRINT(c)  */
      if (!isPRINT(c)) {
-       if (c < 256) {
-           Perl_sv_catpvf(aTHX_ sv, "\\x%02x", c);
-       }
-       else {
-           Perl_sv_catpvf(aTHX_ sv, "\\x{%x}", c);
-       }
+        switch (c) {
+            case '\r': Perl_sv_catpvf(aTHX_ sv, "\\r"); break;
+            case '\n': Perl_sv_catpvf(aTHX_ sv, "\\n"); break;
+            case '\t': Perl_sv_catpvf(aTHX_ sv, "\\t"); break;
+            case '\f': Perl_sv_catpvf(aTHX_ sv, "\\f"); break;
+            case '\a': Perl_sv_catpvf(aTHX_ sv, "\\a"); break;
+
+            default:
+                Perl_sv_catpvf(aTHX_ sv, "\\x{%x}", c);
+                break;
+        }
      }
      else {
         const char string = c;
@@ -15534,6 +15547,63 @@ S_put_byte(pTHX_ SV *sv, int c)
      }
  }
  
+STATIC bool
+S_put_latin1_charclass_innards(pTHX_ SV *sv, char *bitmap)
+{
+    /* Appends to 'sv' a displayable version of the innards of the bracketed
+     * character class whose bitmap is 'bitmap';  Returns 'TRUE' if it actually
+     * output anything */
+
+    int i;
+    int rangestart = -1;
+    bool has_output_anything = FALSE;
+
+    PERL_ARGS_ASSERT_PUT_LATIN1_CHARCLASS_INNARDS;
+
+    for (i = 0; i <= 256; i++) {
+        if (i < 256 && BITMAP_TEST((U8 *) bitmap,i)) {
+            if (rangestart == -1)
+                rangestart = i;
+        } else if (rangestart != -1) {
+            int j = i - 1;
+            if (i <= rangestart + 3) {  /* Individual chars in short ranges */
+                for (; rangestart < i; rangestart++)
+                    put_byte(sv, rangestart);
+            }
+            else if (   j > 255
+                     || ! isALPHANUMERIC(rangestart)
+                     || ! isALPHANUMERIC(j)
+                     || isDIGIT(rangestart) != isDIGIT(j)
+                     || isUPPER(rangestart) != isUPPER(j)
+                     || isLOWER(rangestart) != isLOWER(j)
+
+                        /* This final test should get optimized out except
+                         * on EBCDIC platforms, where it causes ranges that
+                         * cross discontinuities like i/j to be shown as hex
+                         * instead of the misleading, e.g. H-K (since that
+                         * range includes more than H, I, J, K). */
+                     || (j - rangestart)
+                         != NATIVE_TO_ASCII(j) - NATIVE_TO_ASCII(rangestart))
+            {
+                Perl_sv_catpvf(aTHX_ sv, "\\x{%02x}-\\x{%02x}",
+                               rangestart,
+                               (j < 256) ? j : 255);
+            }
+            else { /* Here, the ends of the range are both digits, or both
+                      uppercase, or both lowercase; and there's no
+                      discontinuity in the range (which could happen on EBCDIC
+                      platforms) */
+                put_byte(sv, rangestart);
+                sv_catpvs(sv, "-");
+                put_byte(sv, j);
+            }
+            rangestart = -1;
+            has_output_anything = TRUE;
+        }
+    }
+
+    return has_output_anything;
+}
  
  #define CLEAR_OPTSTART \
      if (optstart) STMT_START { \