regcomp.c: Change loop variable name, associated changes

[perl5.git] / regcomp.c
diff --git a/regcomp.c b/regcomp.c

index d62028d..1b738b6 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -689,7 +689,7 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, I32 *min
                     |= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
             else
                 data->flags &= ~SF_FIX_BEFORE_EOL;
-           data->minlen_fixed=minlenp; 
+           data->minlen_fixed=minlenp;
             data->lookbehind_fixed=0;
         }
         else { /* *data->longest == data->longest_float */
@@ -1387,8 +1387,8 @@ is the recommended Unicode-aware way of saying
                scan += len;                                                   \
                len = 0;                                                       \
             } else {                                                          \
-               uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\
-               uvc = to_uni_fold( uvc, foldbuf, &foldlen );                  \
+               len = UTF8SKIP(uc);\
+               uvc = to_utf8_fold( uc, foldbuf, &foldlen);                   \
                 foldlen -= UNISKIP( uvc );                                    \
                 scan = foldbuf + UNISKIP( uvc );                              \
             }                                                                 \
@@ -1705,7 +1705,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
          DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log, 
              "%*sCompiling trie using list compiler\n",
              (int)depth * 2 + 2, ""));
-       
+
         trie->states = (reg_trie_state *)
             PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
                                   sizeof(reg_trie_state) );
@@ -2647,13 +2647,13 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, I32 *min, U32 flags
      }
      
  #ifdef DEBUGGING
-    /* Allow dumping */
+    /* Allow dumping but overwriting the collection of skipped
+     * ops and/or strings with fake optimized ops */
      n = scan + NODE_SZ_STR(scan);
      while (n <= stop) {
-        if (PL_regkind[OP(n)] != NOTHING || OP(n) == NOTHING) {
-            OP(n) = OPTIMIZED;
-            NEXT_OFF(n) = 0;
-        }
+       OP(n) = OPTIMIZED;
+       FLAGS(n) = 0;
+       NEXT_OFF(n) = 0;
          n++;
      }
  #endif
@@ -2774,7 +2774,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
             int noff;
             regnode *n = scan;
-       
+
             /* Skip NOTHING and LONGJMP. */
             while ((n = regnext(n))
                    && ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
@@ -2796,7 +2796,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             next = regnext(scan);
             code = OP(scan);
             /* demq: the op(next)==code check is to see if we have "branch-branch" AFAICT */
-       
+
             if (OP(next) == code || code == IFTHEN) {
                 /* NOTE - There is similar code to this block below for handling
                    TRIE nodes on a re-study.  If you change stuff here check there
@@ -2804,7 +2804,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 I32 max1 = 0, min1 = I32_MAX, num = 0;
                 struct regnode_charclass_class accum;
                 regnode * const startbranch=scan;
-               
+
                 if (flags & SCF_DO_SUBSTR)
                     SCAN_COMMIT(pRExC_state, data, minlenp); /* Cannot merge strings after this. */
                 if (flags & SCF_DO_STCLASS)
@@ -2941,7 +2941,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                   a nested if into a case structure of sorts.
  
                 */
-               
+
                     int made=0;
                     if (!re_trie_maxbuff) {
                         re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
@@ -3091,7 +3091,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          
                          if ( last && TRIE_TYPE_IS_SAFE ) {
                              made= make_trie( pRExC_state, startbranch, first, scan, tail, count, optype, depth+1 );
-#ifdef TRIE_STUDY_OPT  
+#ifdef TRIE_STUDY_OPT
                              if ( ((made == MADE_EXACT_TRIE && 
                                   startbranch == first) 
                                   || ( first_non_open == first )) && 
@@ -3982,7 +3982,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     break;
                 CASE_SYNST_FNC(VERTWS);
                 CASE_SYNST_FNC(HORIZWS);
-               
+
                 }
                 if (flags & SCF_DO_STCLASS_OR)
                     cl_and(data->start_class, and_withp);
@@ -4363,7 +4363,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 flags &= ~SCF_DO_SUBSTR; 
         }
  #endif /* old or new */
-#endif /* TRIE_STUDY_OPT */    
+#endif /* TRIE_STUDY_OPT */
  
         /* Else: zero-length, ignore. */
         scan = regnext(scan);
@@ -4523,7 +4523,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
      struct regexp *r;
      register regexp_internal *ri;
      STRLEN plen;
-    char  *exp;
+    char* VOL exp;
      char* xend;
      regnode *scan;
      I32 flags;
@@ -4553,7 +4553,14 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
  
      DEBUG_r(if (!PL_colorset) reginitcolors());
  
-    RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
+    exp = SvPV(pattern, plen);
+
+    if (plen == 0) { /* ignore the utf8ness if the pattern is 0 length */
+       RExC_utf8 = RExC_orig_utf8 = 0;
+    }
+    else {
+       RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
+    }
      RExC_uni_semantics = 0;
      RExC_contains_locale = 0;
  
@@ -4565,12 +4572,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
      }
  
      if (jump_ret == 0) {    /* First time through */
-       exp = SvPV(pattern, plen);
         xend = exp + plen;
-       /* ignore the utf8ness if the pattern is 0 length */
-       if (plen == 0) {
-           RExC_utf8 = RExC_orig_utf8 = 0;
-       }
  
          DEBUG_COMPILE_r({
              SV *dsv= sv_newmortal();
@@ -4602,7 +4604,9 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
          -- dmq */
          DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
             "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
-        exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)SvPV(pattern, plen), &len);
+        exp = (char*)Perl_bytes_to_utf8(aTHX_
+                                       (U8*)SvPV_nomg(pattern, plen),
+                                       &len);
          xend = exp + len;
          RExC_orig_utf8 = RExC_utf8 = 1;
          SAVEFREEPV(exp);
@@ -4659,7 +4663,11 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
         * Clever compilers notice this and complain. --jhi */
      REGC((U8)REG_MAGIC, (char*)RExC_emit);
  #endif
-    DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n"));
+    DEBUG_PARSE_r(
+       PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n");
+        RExC_lastnum=0;
+        RExC_lastparse=NULL;
+    );
      if (reg(pRExC_state, 0, &flags,1) == NULL) {
         RExC_precomp = NULL;
         return(NULL);
@@ -4911,7 +4919,7 @@ reStudy:
                     sawplus = 1;
                 else
                     first += regarglen[OP(first)];
-               
+
                 first = NEXTOPER(first);
                 first_next= regnext(first);
         }
@@ -4926,7 +4934,7 @@ reStudy:
             else
                 ri->regstclass = first;
         }
-#ifdef TRIE_STCLASS    
+#ifdef TRIE_STCLASS
         else if (PL_regkind[OP(first)] == TRIE &&
                 ((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0) 
         {
@@ -4947,7 +4955,7 @@ reStudy:
              make_trie_failtable(pRExC_state, (regnode *)first, trie_op, 0);
             ri->regstclass = trie_op;
         }
-#endif 
+#endif
         else if (REGNODE_SIMPLE(OP(first)))
             ri->regstclass = first;
         else if (PL_regkind[OP(first)] == BOUND ||
@@ -5013,7 +5021,7 @@ reStudy:
         * it happens that c_offset_min has been invalidated, since the
         * earlier string may buy us something the later one won't.]
         */
-       
+
         data.longest_fixed = newSVpvs("");
         data.longest_float = newSVpvs("");
         data.last_found = newSVpvs("");
@@ -5031,7 +5039,7 @@ reStudy:
              &data, -1, NULL, NULL,
              SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0);
  
-       
+
          CHECK_RESTUDY_GOTO;
  
  
@@ -5199,7 +5207,7 @@ reStudy:
         I32 fake;
         struct regnode_charclass_class ch_class;
         I32 last_close = 0;
-       
+
         DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "\nMulti Top Level\n"));
  
         scan = ri->program + 1;
@@ -5405,7 +5413,8 @@ Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
                      if (!retarray)
                          return ret;
                  } else {
-                    ret = newSVsv(&PL_sv_undef);
+                    if (retarray)
+                        ret = newSVsv(&PL_sv_undef);
                  }
                  if (retarray)
                      av_push(retarray, ret);
@@ -5824,19 +5833,87 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
  
  /* This section of code defines the inversion list object and its methods.  The
   * interfaces are highly subject to change, so as much as possible is static to
- * this file.  An inversion list is here implemented as a malloc'd C array with
- * some added info.  More will be coming when functionality is added later.
+ * this file.  An inversion list is here implemented as a malloc'd C UV array
+ * with some added info that is placed as UVs at the beginning in a header
+ * portion.  An inversion list for Unicode is an array of code points, sorted
+ * by ordinal number.  The zeroth element is the first code point in the list.
+ * The 1th element is the first element beyond that not in the list.  In other
+ * words, the first range is
+ *  invlist[0]..(invlist[1]-1)
+ * The other ranges follow.  Thus every element whose index is divisible by two
+ * marks the beginning of a range that is in the list, and every element not
+ * divisible by two marks the beginning of a range not in the list.  A single
+ * element inversion list that contains the single code point N generally
+ * consists of two elements
+ *  invlist[0] == N
+ *  invlist[1] == N+1
+ * (The exception is when N is the highest representable value on the
+ * machine, in which case the list containing just it would be a single
+ * element, itself.  By extension, if the last range in the list extends to
+ * infinity, then the first element of that range will be in the inversion list
+ * at a position that is divisible by two, and is the final element in the
+ * list.)
+ * Taking the complement (inverting) an inversion list is quite simple, if the
+ * first element is 0, remove it; otherwise add a 0 element at the beginning.
+ * This implementation reserves an element at the beginning of each inversion list
+ * to contain 0 when the list contains 0, and contains 1 otherwise.  The actual
+ * beginning of the list is either that element if 0, or the next one if 1.
   *
- * It is currently implemented as an SV pointing to an array of UVs that the SV
- * thinks are bytes.  This allows us to have an array of UV whose memory
- * management is automatically handled by the existing facilities for SV's.
+ * More about inversion lists can be found in "Unicode Demystified"
+ * Chapter 13 by Richard Gillam, published by Addison-Wesley.
+ * More will be coming when functionality is added later.
+ *
+ * The inversion list data structure is currently implemented as an SV pointing
+ * to an array of UVs that the SV thinks are bytes.  This allows us to have an
+ * array of UV whose memory management is automatically handled by the existing
+ * facilities for SV's.
   *
   * Some of the methods should always be private to the implementation, and some
   * should eventually be made public */
  
+#define INVLIST_LEN_OFFSET 0   /* Number of elements in the inversion list */
+#define INVLIST_ITER_OFFSET 1  /* Current iteration position */
+
+#define INVLIST_ZERO_OFFSET 2  /* 0 or 1; must be last element in header */
+/* The UV at position ZERO contains either 0 or 1.  If 0, the inversion list
+ * contains the code point U+00000, and begins here.  If 1, the inversion list
+ * doesn't contain U+0000, and it begins at the next UV in the array.
+ * Inverting an inversion list consists of adding or removing the 0 at the
+ * beginning of it.  By reserving a space for that 0, inversion can be made
+ * very fast */
+
+#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 1)
+
+/* Internally things are UVs */
+#define TO_INTERNAL_SIZE(x) ((x + HEADER_LENGTH) * sizeof(UV))
+#define FROM_INTERNAL_SIZE(x) ((x / sizeof(UV)) - HEADER_LENGTH)
+
  #define INVLIST_INITIAL_LEN 10
  
  PERL_STATIC_INLINE UV*
+S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
+{
+    /* Returns a pointer to the first element in the inversion list's array.
+     * This is called upon initialization of an inversion list.  Where the
+     * array begins depends on whether the list has the code point U+0000
+     * in it or not.  The other parameter tells it whether the code that
+     * follows this call is about to put a 0 in the inversion list or not.
+     * The first element is either the element with 0, if 0, or the next one,
+     * if 1 */
+
+    UV* zero = get_invlist_zero_addr(invlist);
+
+    PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT;
+
+    /* Must be empty */
+    assert(! *get_invlist_len_addr(invlist));
+
+    /* 1^1 = 0; 1^0 = 1 */
+    *zero = 1 ^ will_have_0;
+    return zero + *zero;
+}
+
+PERL_STATIC_INLINE UV*
  S_invlist_array(pTHX_ SV* const invlist)
  {
      /* Returns the pointer to the inversion list's array.  Every time the
@@ -5845,28 +5922,39 @@ S_invlist_array(pTHX_ SV* const invlist)
  
      PERL_ARGS_ASSERT_INVLIST_ARRAY;
  
-    return (UV *) SvPVX(invlist);
+    /* Must not be empty.  If these fail, you probably didn't check for <len>
+     * being non-zero before trying to get the array */
+    assert(*get_invlist_len_addr(invlist));
+    assert(*get_invlist_zero_addr(invlist) == 0
+          || *get_invlist_zero_addr(invlist) == 1);
+
+    /* The array begins either at the element reserved for zero if the
+     * list contains 0 (that element will be set to 0), or otherwise the next
+     * element (in which case the reserved element will be set to 1). */
+    return (UV *) (get_invlist_zero_addr(invlist)
+                  + *get_invlist_zero_addr(invlist));
  }
  
-PERL_STATIC_INLINE UV
-S_invlist_len(pTHX_ SV* const invlist)
+PERL_STATIC_INLINE UV*
+S_get_invlist_len_addr(pTHX_ SV* invlist)
  {
-    /* Returns the current number of elements in the inversion list's array */
+    /* Return the address of the UV that contains the current number
+     * of used elements in the inversion list */
  
-    PERL_ARGS_ASSERT_INVLIST_LEN;
+    PERL_ARGS_ASSERT_GET_INVLIST_LEN_ADDR;
  
-    return SvCUR(invlist) / sizeof(UV);
+    return (UV *) (SvPVX(invlist) + (INVLIST_LEN_OFFSET * sizeof (UV)));
  }
  
  PERL_STATIC_INLINE UV
-S_invlist_max(pTHX_ SV* const invlist)
+S_invlist_len(pTHX_ SV* const invlist)
  {
-    /* Returns the maximum number of elements storable in the inversion list's
-     * array, without having to realloc() */
+    /* Returns the current number of elements stored in the inversion list's
+     * array */
  
-    PERL_ARGS_ASSERT_INVLIST_MAX;
+    PERL_ARGS_ASSERT_INVLIST_LEN;
  
-    return SvLEN(invlist) / sizeof(UV);
+    return *get_invlist_len_addr(invlist);
  }
  
  PERL_STATIC_INLINE void
@@ -5876,23 +5964,48 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
  
      PERL_ARGS_ASSERT_INVLIST_SET_LEN;
  
-    SvCUR_set(invlist, len * sizeof(UV));
+    *get_invlist_len_addr(invlist) = len;
+
+    assert(len <= SvLEN(invlist));
+
+    SvCUR_set(invlist, TO_INTERNAL_SIZE(len));
+    /* If the list contains U+0000, that element is part of the header,
+     * and should not be counted as part of the array.  It will contain
+     * 0 in that case, and 1 otherwise.  So we could flop 0=>1, 1=>0 and
+     * subtract:
+     * SvCUR_set(invlist,
+     *           TO_INTERNAL_SIZE(len
+     *                            - (*get_invlist_zero_addr(inv_list) ^ 1)));
+     * But, this is only valid if len is not 0.  The consequences of not doing
+     * this is that the memory allocation code may think that 1 more UV is
+     * being used than actually is, and so might do an unnecessary grow.  That
+     * seems worth not bothering to make this the precise amount.
+     *
+     * Note that when inverting, SvCUR shouldn't change */
  }
  
-PERL_STATIC_INLINE void
-S_invlist_set_max(pTHX_ SV* const invlist, const UV max)
+PERL_STATIC_INLINE UV
+S_invlist_max(pTHX_ SV* const invlist)
  {
+    /* Returns the maximum number of elements storable in the inversion list's
+     * array, without having to realloc() */
  
-    /* Sets the maximum number of elements storable in the inversion list
-     * without having to realloc() */
+    PERL_ARGS_ASSERT_INVLIST_MAX;
  
-    PERL_ARGS_ASSERT_INVLIST_SET_MAX;
+    return FROM_INTERNAL_SIZE(SvLEN(invlist));
+}
  
-    if (max < invlist_len(invlist)) {
-       Perl_croak(aTHX_ "panic: Can't make max size '%"UVuf"' less than current length %"UVuf" in inversion list", invlist_max(invlist), invlist_len(invlist));
-    }
+PERL_STATIC_INLINE UV*
+S_get_invlist_zero_addr(pTHX_ SV* invlist)
+{
+    /* Return the address of the UV that is reserved to hold 0 if the inversion
+     * list contains 0.  This has to be the last element of the heading, as the
+     * list proper starts with either it if 0, or the next element if not.
+     * (But we force it to contain either 0 or 1) */
+
+    PERL_ARGS_ASSERT_GET_INVLIST_ZERO_ADDR;
  
-    SvLEN_set(invlist, max * sizeof(UV));
+    return (UV *) (SvPVX(invlist) + (INVLIST_ZERO_OFFSET * sizeof (UV)));
  }
  
  #ifndef PERL_IN_XSUB_RE
@@ -5904,24 +6017,26 @@ Perl__new_invlist(pTHX_ IV initial_size)
       * space to store 'initial_size' elements.  If that number is negative, a
       * system default is used instead */
  
+    SV* new_list;
+
      if (initial_size < 0) {
         initial_size = INVLIST_INITIAL_LEN;
      }
  
      /* Allocate the initial space */
-    return newSV(initial_size * sizeof(UV));
-}
-#endif
+    new_list = newSV(TO_INTERNAL_SIZE(initial_size));
+    invlist_set_len(new_list, 0);
  
-PERL_STATIC_INLINE void
-S_invlist_destroy(pTHX_ SV* const invlist)
-{
-   /* Inversion list destructor */
+    /* Force iterinit() to be used to get iteration to work */
+    *get_invlist_iter_addr(new_list) = UV_MAX;
  
-    PERL_ARGS_ASSERT_INVLIST_DESTROY;
+    /* This should force a segfault if a method doesn't initialize this
+     * properly */
+    *get_invlist_zero_addr(new_list) = UV_MAX;
  
-    SvREFCNT_dec(invlist);
+    return new_list;
  }
+#endif
  
  STATIC void
  S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
@@ -5930,7 +6045,7 @@ S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
  
      PERL_ARGS_ASSERT_INVLIST_EXTEND;
  
-    SvGROW((SV *)invlist, new_max * sizeof(UV));
+    SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max));
  }
  
  PERL_STATIC_INLINE void
@@ -5946,9 +6061,8 @@ S_invlist_trim(pTHX_ SV* const invlist)
  
  /* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
   * etc */
-
-#define ELEMENT_IN_INVLIST_SET(i) (! ((i) & 1))
-#define PREV_ELEMENT_IN_INVLIST_SET(i) ! ELEMENT_IN_INVLIST_SET(i)
+#define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1))
+#define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i))
  
  #ifndef PERL_IN_XSUB_RE
  void
@@ -5958,14 +6072,16 @@ Perl__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV
      * the end of the inversion list.  The range must be above any existing
      * ones. */
  
-    UV* array = invlist_array(invlist);
+    UV* array;
      UV max = invlist_max(invlist);
      UV len = invlist_len(invlist);
  
      PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST;
  
-    if (len > 0) {
-
+    if (len == 0) { /* Empty lists must be initialized */
+        array = _invlist_array_init(invlist, start == 0);
+    }
+    else {
         /* Here, the existing list is non-empty. The current max entry in the
          * list is generally the first value not in the set, except when the
          * set extends to the end of permissible values, in which case it is
@@ -5973,8 +6089,9 @@ Perl__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV
          * append out-of-order */
  
         UV final_element = len - 1;
+       array = invlist_array(invlist);
         if (array[final_element] > start
-           || ELEMENT_IN_INVLIST_SET(final_element))
+           || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
         {
             Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list");
         }
@@ -5989,7 +6106,7 @@ Perl__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV
             }
             else {
                 /* But if the end is the maximum representable on the machine,
-                * just let the range that this would extend have no end */
+                * just let the range that this would extend to have no end */
                 invlist_set_len(invlist, len - 1);
             }
             return;
@@ -6004,10 +6121,13 @@ Perl__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV
       * moved */
      if (max < len) {
         invlist_extend(invlist, len);
+       invlist_set_len(invlist, len);  /* Have to set len here to avoid assert
+                                          failure in invlist_array() */
         array = invlist_array(invlist);
      }
-
-    invlist_set_len(invlist, len);
+    else {
+       invlist_set_len(invlist, len);
+    }
  
      /* The next item on the list starts the range, the one after that is
       * one past the new range.  */
@@ -6021,14 +6141,138 @@ Perl__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV
         invlist_set_len(invlist, len - 1);
      }
  }
-#endif
  
-STATIC void
-S_invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
+STATIC IV
+S_invlist_search(pTHX_ SV* const invlist, const UV cp)
+{
+    /* Searches the inversion list for the entry that contains the input code
+     * point <cp>.  If <cp> is not in the list, -1 is returned.  Otherwise, the
+     * return value is the index into the list's array of the range that
+     * contains <cp> */
+
+    IV low = 0;
+    IV high = invlist_len(invlist);
+    const UV * const array = invlist_array(invlist);
+
+    PERL_ARGS_ASSERT_INVLIST_SEARCH;
+
+    /* If list is empty or the code point is before the first element, return
+     * failure. */
+    if (high == 0 || cp < array[0]) {
+       return -1;
+    }
+
+    /* Binary search.  What we are looking for is <i> such that
+     * array[i] <= cp < array[i+1]
+     * The loop below converges on the i+1. */
+    while (low < high) {
+       IV mid = (low + high) / 2;
+       if (array[mid] <= cp) {
+           low = mid + 1;
+
+           /* We could do this extra test to exit the loop early.
+           if (cp < array[low]) {
+               return mid;
+           }
+           */
+       }
+       else { /* cp < array[mid] */
+           high = mid;
+       }
+    }
+
+    return high - 1;
+}
+
+void
+Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV end, U8* swatch)
+{
+    /* populates a swatch of a swash the same way swatch_get() does in utf8.c,
+     * but is used when the swash has an inversion list.  This makes this much
+     * faster, as it uses a binary search instead of a linear one.  This is
+     * intimately tied to that function, and perhaps should be in utf8.c,
+     * except it is intimately tied to inversion lists as well.  It assumes
+     * that <swatch> is all 0's on input */
+
+    UV current = start;
+    const IV len = invlist_len(invlist);
+    IV i;
+    const UV * array;
+
+    PERL_ARGS_ASSERT__INVLIST_POPULATE_SWATCH;
+
+    if (len == 0) { /* Empty inversion list */
+        return;
+    }
+
+    array = invlist_array(invlist);
+
+    /* Find which element it is */
+    i = invlist_search(invlist, start);
+
+    /* We populate from <start> to <end> */
+    while (current < end) {
+        UV upper;
+
+       /* The inversion list gives the results for every possible code point
+        * after the first one in the list.  Only those ranges whose index is
+        * even are ones that the inversion list matches.  For the odd ones,
+        * and if the initial code point is not in the list, we have to skip
+        * forward to the next element */
+        if (i == -1 || ! ELEMENT_RANGE_MATCHES_INVLIST(i)) {
+            i++;
+            if (i >= len) { /* Finished if beyond the end of the array */
+                return;
+            }
+            current = array[i];
+           if (current >= end) {   /* Finished if beyond the end of what we
+                                      are populating */
+                return;
+            }
+        }
+        assert(current >= start);
+
+       /* The current range ends one below the next one, except don't go past
+        * <end> */
+        i++;
+        upper = (i < len && array[i] < end) ? array[i] : end;
+
+       /* Here we are in a range that matches.  Populate a bit in the 3-bit U8
+        * for each code point in it */
+        for (; current < upper; current++) {
+            const STRLEN offset = (STRLEN)(current - start);
+            swatch[offset >> 3] |= 1 << (offset & 7);
+        }
+
+       /* Quit if at the end of the list */
+        if (i >= len) {
+
+           /* But first, have to deal with the highest possible code point on
+            * the platform.  The previous code assumes that <end> is one
+            * beyond where we want to populate, but that is impossible at the
+            * platform's infinity, so have to handle it specially */
+            if (UNLIKELY(end == UV_MAX && ELEMENT_RANGE_MATCHES_INVLIST(len-1)))
+           {
+                const STRLEN offset = (STRLEN)(end - start);
+                swatch[offset >> 3] |= 1 << (offset & 7);
+            }
+            return;
+        }
+
+       /* Advance to the next range, which will be for code points not in the
+        * inversion list */
+        current = array[i];
+    }
+
+    return;
+}
+
+void
+Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
  {
-    /* Take the union of two inversion lists and point 'result' to it.  If
-     * 'result' on input points to one of the two lists, the reference count to
-     * that list will be decremented.
+    /* Take the union of two inversion lists and point <output> to it.  *output
+     * should be defined upon input, and if it points to one of the two lists,
+     * the reference count to that list will be decremented.
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
       * length there.  The preface says to incorporate its examples into your
@@ -6042,10 +6286,10 @@ S_invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
       * return the larger of the input lists, but then outside code might need
       * to keep track of whether to free the input list or not */
  
-    UV* array_a = invlist_array(a);   /* a's array */
-    UV* array_b = invlist_array(b);
-    UV len_a = invlist_len(a); /* length of a's array */
-    UV len_b = invlist_len(b);
+    UV* array_a;    /* a's array */
+    UV* array_b;
+    UV len_a;      /* length of a's array */
+    UV len_b;
  
      SV* u;                     /* the resulting union */
      UV* array_u;
@@ -6063,12 +6307,42 @@ S_invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
       */
      UV count = 0;
  
-    PERL_ARGS_ASSERT_INVLIST_UNION;
+    PERL_ARGS_ASSERT__INVLIST_UNION;
+    assert(a != b);
+
+    /* If either one is empty, the union is the other one */
+    len_a = invlist_len(a);
+    if (len_a == 0) {
+       if (*output == a) {
+           SvREFCNT_dec(a);
+       }
+       if (*output != b) {
+           *output = invlist_clone(b);
+       } /* else *output already = b; */
+       return;
+    }
+    else if ((len_b = invlist_len(b)) == 0) {
+       if (*output == b) {
+           SvREFCNT_dec(b);
+       }
+       if (*output != a) {
+           *output = invlist_clone(a);
+       }
+       /* else *output already = a; */
+       return;
+    }
+
+    /* Here both lists exist and are non-empty */
+    array_a = invlist_array(a);
+    array_b = invlist_array(b);
  
      /* Size the union for the worst case: that the sets are completely
       * disjoint */
      u = _new_invlist(len_a + len_b);
-    array_u = invlist_array(u);
+
+    /* Will contain U+0000 if either component does */
+    array_u = _invlist_array_init(u, (len_a > 0 && array_a[0] == 0)
+                                     || (len_b > 0 && array_b[0] == 0));
  
      /* Go through each list item by item, stopping when exhausted one of
       * them */
@@ -6088,13 +6362,14 @@ S_invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
          * be seamlessly merged.  (In a tie and both are in the set or both not
          * in the set, it doesn't matter which we take first.) */
         if (array_a[i_a] < array_b[i_b]
-           || (array_a[i_a] == array_b[i_b] && ELEMENT_IN_INVLIST_SET(i_a)))
+           || (array_a[i_a] == array_b[i_b]
+               && ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
         {
-           cp_in_set = ELEMENT_IN_INVLIST_SET(i_a);
+           cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
             cp= array_a[i_a++];
         }
         else {
-           cp_in_set = ELEMENT_IN_INVLIST_SET(i_b);
+           cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
             cp= array_b[i_b++];
         }
  
@@ -6134,8 +6409,8 @@ S_invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
       * 4) the exhausted wasn't in its set, non-exhausted is, count is 1;
       *    decrementing to 0 insures that we look at the remainder of the
       *    non-exhausted set */
-    if ((i_a != len_a && PREV_ELEMENT_IN_INVLIST_SET(i_a))
-       || (i_b != len_b && PREV_ELEMENT_IN_INVLIST_SET(i_b)))
+    if ((i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
+       || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count--;
      }
@@ -6175,7 +6450,7 @@ S_invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
      }
  
      /*  We may be removing a reference to one of the inputs */
-    if (&a == output || &b == output) {
+    if (a == *output || b == *output) {
         SvREFCNT_dec(*output);
      }
  
@@ -6183,12 +6458,12 @@ S_invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
      return;
  }
  
-STATIC void
-S_invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
+void
+Perl__invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
  {
-    /* Take the intersection of two inversion lists and point 'i' to it.  If
-     * 'i' on input points to one of the two lists, the reference count to that
-     * list will be decremented.
+    /* Take the intersection of two inversion lists and point <i> to it.  *i
+     * should be defined upon input, and if it points to one of the two lists,
+     * the reference count to that list will be decremented.
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
       * length there.  The preface says to incorporate its examples into your
@@ -6198,10 +6473,10 @@ S_invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
       * union above
       */
  
-    UV* array_a = invlist_array(a);   /* a's array */
-    UV* array_b = invlist_array(b);
-    UV len_a = invlist_len(a); /* length of a's array */
-    UV len_b = invlist_len(b);
+    UV* array_a;               /* a's array */
+    UV* array_b;
+    UV len_a;  /* length of a's array */
+    UV len_b;
  
      SV* r;                  /* the resulting intersection */
      UV* array_r;
@@ -6219,12 +6494,37 @@ S_invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
       */
      UV count = 0;
  
-    PERL_ARGS_ASSERT_INVLIST_INTERSECTION;
+    PERL_ARGS_ASSERT__INVLIST_INTERSECTION;
+    assert(a != b);
+
+    /* If either one is empty, the intersection is null */
+    len_a = invlist_len(a);
+    if ((len_a == 0) || ((len_b = invlist_len(b)) == 0)) {
+
+       /* If the result is the same as one of the inputs, the input is being
+        * overwritten */
+       if (*i == a) {
+           SvREFCNT_dec(a);
+       }
+       else if (*i == b) {
+           SvREFCNT_dec(b);
+       }
+
+       *i = _new_invlist(0);
+       return;
+    }
+
+    /* Here both lists exist and are non-empty */
+    array_a = invlist_array(a);
+    array_b = invlist_array(b);
  
      /* Size the intersection for the worst case: that the intersection ends up
       * fragmenting everything to be completely disjoint */
      r= _new_invlist(len_a + len_b);
-    array_r = invlist_array(r);
+
+    /* Will contain U+0000 iff both components do */
+    array_r = _invlist_array_init(r, len_a > 0 && array_a[0] == 0
+                                    && len_b > 0 && array_b[0] == 0);
  
      /* Go through each list item by item, stopping when exhausted one of
       * them */
@@ -6245,13 +6545,14 @@ S_invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
          * momentarily incremented to 2.  (In a tie and both are in the set or
          * both not in the set, it doesn't matter which we take first.) */
         if (array_a[i_a] < array_b[i_b]
-           || (array_a[i_a] == array_b[i_b] && ! ELEMENT_IN_INVLIST_SET(i_a)))
+           || (array_a[i_a] == array_b[i_b]
+               && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
         {
-           cp_in_set = ELEMENT_IN_INVLIST_SET(i_a);
+           cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
             cp= array_a[i_a++];
         }
         else {
-           cp_in_set = ELEMENT_IN_INVLIST_SET(i_b);
+           cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
             cp= array_b[i_b++];
         }
  
@@ -6288,8 +6589,8 @@ S_invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
       *    everything that remains in the non-exhausted set.
       * 4) the exhausted wasn't in its set, non-exhausted is, count is 1, and
       *    remains 1.  And the intersection has nothing more. */
-    if ((i_a == len_a && PREV_ELEMENT_IN_INVLIST_SET(i_a))
-       || (i_b == len_b && PREV_ELEMENT_IN_INVLIST_SET(i_b)))
+    if ((i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
+       || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
      {
         count++;
      }
@@ -6321,7 +6622,7 @@ S_invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
      }
  
      /*  We may be removing a reference to one of the inputs */
-    if (&a == i || &b == i) {
+    if (a == *i || b == *i) {
         SvREFCNT_dec(*i);
      }
  
@@ -6329,6 +6630,8 @@ S_invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
      return;
  }
  
+#endif
+
  STATIC SV*
  S_add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
  {
@@ -6363,10 +6666,10 @@ S_add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
      range_invlist = _new_invlist(2);
      _append_range_to_invlist(range_invlist, start, end);
  
-    invlist_union(invlist, range_invlist, &invlist);
+    _invlist_union(invlist, range_invlist, &invlist);
  
-    /* The passed in list can be freed, as well as our temporary */
-    invlist_destroy(range_invlist);
+    /* The temporary can be freed */
+    SvREFCNT_dec(range_invlist);
  
      return invlist;
  }
@@ -6376,7 +6679,250 @@ S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) {
      return add_range_to_invlist(invlist, cp, cp);
  }
  
+#ifndef PERL_IN_XSUB_RE
+void
+Perl__invlist_invert(pTHX_ SV* const invlist)
+{
+    /* Complement the input inversion list.  This adds a 0 if the list didn't
+     * have a zero; removes it otherwise.  As described above, the data
+     * structure is set up so that this is very efficient */
+
+    UV* len_pos = get_invlist_len_addr(invlist);
+
+    PERL_ARGS_ASSERT__INVLIST_INVERT;
+
+    /* The inverse of matching nothing is matching everything */
+    if (*len_pos == 0) {
+       _append_range_to_invlist(invlist, 0, UV_MAX);
+       return;
+    }
+
+    /* The exclusive or complents 0 to 1; and 1 to 0.  If the result is 1, the
+     * zero element was a 0, so it is being removed, so the length decrements
+     * by 1; and vice-versa.  SvCUR is unaffected */
+    if (*get_invlist_zero_addr(invlist) ^= 1) {
+       (*len_pos)--;
+    }
+    else {
+       (*len_pos)++;
+    }
+}
+
+void
+Perl__invlist_invert_prop(pTHX_ SV* const invlist)
+{
+    /* Complement the input inversion list (which must be a Unicode property,
+     * all of which don't match above the Unicode maximum code point.)  And
+     * Perl has chosen to not have the inversion match above that either.  This
+     * adds a 0x110000 if the list didn't end with it, and removes it if it did
+     */
+
+    UV len;
+    UV* array;
+
+    PERL_ARGS_ASSERT__INVLIST_INVERT_PROP;
+
+    _invlist_invert(invlist);
+
+    len = invlist_len(invlist);
+
+    if (len != 0) { /* If empty do nothing */
+       array = invlist_array(invlist);
+       if (array[len - 1] != PERL_UNICODE_MAX + 1) {
+           /* Add 0x110000.  First, grow if necessary */
+           len++;
+           if (invlist_max(invlist) < len) {
+               invlist_extend(invlist, len);
+               array = invlist_array(invlist);
+           }
+           invlist_set_len(invlist, len);
+           array[len - 1] = PERL_UNICODE_MAX + 1;
+       }
+       else {  /* Remove the 0x110000 */
+           invlist_set_len(invlist, len - 1);
+       }
+    }
+
+    return;
+}
+#endif
+
+PERL_STATIC_INLINE SV*
+S_invlist_clone(pTHX_ SV* const invlist)
+{
+
+    /* Return a new inversion list that is a copy of the input one, which is
+     * unchanged */
+
+    /* Need to allocate extra space to accommodate Perl's addition of a
+     * trailing NUL to SvPV's, since it thinks they are always strings */
+    SV* new_invlist = _new_invlist(invlist_len(invlist) + 1);
+    STRLEN length = SvCUR(invlist);
+
+    PERL_ARGS_ASSERT_INVLIST_CLONE;
+
+    SvCUR_set(new_invlist, length); /* This isn't done automatically */
+    Copy(SvPVX(invlist), SvPVX(new_invlist), length, char);
+
+    return new_invlist;
+}
+
+#ifndef PERL_IN_XSUB_RE
+void
+Perl__invlist_subtract(pTHX_ SV* const a, SV* const b, SV** result)
+{
+    /* Point <result> to an inversion list which consists of all elements in
+     * <a> that aren't also in <b>.  *result should be defined upon input, and
+     * if it points to C<b> its reference count will be decremented. */
+
+    PERL_ARGS_ASSERT__INVLIST_SUBTRACT;
+    assert(a != b);
+
+    /* Subtracting nothing retains the original */
+    if (invlist_len(b) == 0) {
+
+       if (*result == b) {
+           SvREFCNT_dec(b);
+       }
+
+       /* If the result is not to be the same variable as the original, create
+        * a copy */
+       if (*result != a) {
+           *result = invlist_clone(a);
+       }
+    } else {
+       SV *b_copy = invlist_clone(b);
+       _invlist_invert(b_copy);        /* Everything not in 'b' */
+
+       if (*result == b) {
+           SvREFCNT_dec(b);
+       }
+
+       _invlist_intersection(a, b_copy, result);    /* Everything in 'a' not in
+                                                      'b' */
+       SvREFCNT_dec(b_copy);
+    }
+
+    return;
+}
+#endif
+
+PERL_STATIC_INLINE UV*
+S_get_invlist_iter_addr(pTHX_ SV* invlist)
+{
+    /* Return the address of the UV that contains the current iteration
+     * position */
+
+    PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR;
+
+    return (UV *) (SvPVX(invlist) + (INVLIST_ITER_OFFSET * sizeof (UV)));
+}
+
+PERL_STATIC_INLINE void
+S_invlist_iterinit(pTHX_ SV* invlist)  /* Initialize iterator for invlist */
+{
+    PERL_ARGS_ASSERT_INVLIST_ITERINIT;
+
+    *get_invlist_iter_addr(invlist) = 0;
+}
+
+STATIC bool
+S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
+{
+    /* An C<invlist_iterinit> call on <invlist> must be used to set this up.
+     * This call sets in <*start> and <*end>, the next range in <invlist>.
+     * Returns <TRUE> if successful and the next call will return the next
+     * range; <FALSE> if was already at the end of the list.  If the latter,
+     * <*start> and <*end> are unchanged, and the next call to this function
+     * will start over at the beginning of the list */
+
+    UV* pos = get_invlist_iter_addr(invlist);
+    UV len = invlist_len(invlist);
+    UV *array;
+
+    PERL_ARGS_ASSERT_INVLIST_ITERNEXT;
+
+    if (*pos >= len) {
+       *pos = UV_MAX;  /* Force iternit() to be required next time */
+       return FALSE;
+    }
+
+    array = invlist_array(invlist);
+
+    *start = array[(*pos)++];
+
+    if (*pos >= len) {
+       *end = UV_MAX;
+    }
+    else {
+       *end = array[(*pos)++] - 1;
+    }
+
+    return TRUE;
+}
+
+#ifndef PERL_IN_XSUB_RE
+SV *
+Perl__invlist_contents(pTHX_ SV* const invlist)
+{
+    /* Get the contents of an inversion list into a string SV so that they can
+     * be printed out.  It uses the format traditionally done for debug tracing
+     */
+
+    UV start, end;
+    SV* output = newSVpvs("\n");
+
+    PERL_ARGS_ASSERT__INVLIST_CONTENTS;
+
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
+       if (end == UV_MAX) {
+           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"\tINFINITY\n", start);
+       }
+       else if (end != start) {
+           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"\t%04"UVXf"\n",
+                   start,       end);
+       }
+       else {
+           Perl_sv_catpvf(aTHX_ output, "%04"UVXf"\n", start);
+       }
+    }
+
+    return output;
+}
+#endif
+
+#if 0
+void
+S_invlist_dump(pTHX_ SV* const invlist, const char * const header)
+{
+    /* Dumps out the ranges in an inversion list.  The string 'header'
+     * if present is output on a line before the first range */
+
+    UV start, end;
+
+    if (header && strlen(header)) {
+       PerlIO_printf(Perl_debug_log, "%s\n", header);
+    }
+    invlist_iterinit(invlist);
+    while (invlist_iternext(invlist, &start, &end)) {
+       if (end == UV_MAX) {
+           PerlIO_printf(Perl_debug_log, "0x%04"UVXf" .. INFINITY\n", start);
+       }
+       else {
+           PerlIO_printf(Perl_debug_log, "0x%04"UVXf" .. 0x%04"UVXf"\n", start, end);
+       }
+    }
+}
+#endif
+
+#undef HEADER_LENGTH
  #undef INVLIST_INITIAL_LENGTH
+#undef TO_INTERNAL_SIZE
+#undef FROM_INTERNAL_SIZE
+#undef INVLIST_LEN_OFFSET
+#undef INVLIST_ZERO_OFFSET
+#undef INVLIST_ITER_OFFSET
  
  /* End of inversion list object */
  
@@ -6759,7 +7305,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         RExC_parse++;
                 if (*RExC_parse!=')') 
                     vFAIL("Expecting close bracket");
-                       
+
                gen_recurse_regop:
                  if ( paren == '-' ) {
                      /*
@@ -6836,7 +7382,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     RExC_parse++;
                 }
                 if (*RExC_parse != ')') {
-                   RExC_parse = s;             
+                   RExC_parse = s;
                     vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
                 }
                 if (!SIZE_ONLY) {
@@ -6894,7 +7440,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                         || RExC_parse[1] == '<'
                         || RExC_parse[1] == '{') { /* Lookahead or eval. */
                         I32 flag;
-                       
+
                         ret = reg_node(pRExC_state, LOGICAL);
                         if (!SIZE_ONLY)
                             ret->flags = 1;
@@ -7396,7 +7942,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
              Set_Node_Length(ret, 1);
          }
      }
-       
+
      if (!first && SIZE_ONLY)
         RExC_extralen += 1;                     /* BRANCHJ */
  
@@ -8266,7 +8812,7 @@ tryagain:
             break;          
         case 'p':
         case 'P':
-           {   
+           {
                 char* const oldregxend = RExC_end;
  #ifdef DEBUGGING
                 char* parse_start = RExC_parse - 2;
@@ -8593,7 +9139,7 @@ tryagain:
                     case 'x':
                         if (*++p == '{') {
                             char* const e = strchr(p, '}');
-       
+
                             if (!e) {
                                 RExC_parse = p + 1;
                                 vFAIL("Missing right brace on \\x{}");
@@ -9039,7 +9585,7 @@ tryagain:
                 *flagp |= HASWIDTH;
             if (len == 1 && UNI_IS_INVARIANT(ender))
                 *flagp |= SIMPLE;
-               
+
             if (SIZE_ONLY)
                 RExC_size += STR_SZ(len);
             else {
@@ -9108,7 +9654,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
         POSIXCC(UCHARAT(RExC_parse))) {
         const char c = UCHARAT(RExC_parse);
         char* const s = RExC_parse++;
-       
+
         while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
             RExC_parse++;
         if (RExC_parse == RExC_end)
@@ -9243,7 +9789,7 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
      }
  }
  
-/* No locale test, and always Unicode semantics */
+/* No locale test, and always Unicode semantics, no ignore-case differences */
  #define _C_C_T_NOLOC_(NAME,TEST,WORD)                                          \
  ANYOF_##NAME:                                                                  \
         for (value = 0; value < 256; value++)                                  \
@@ -9263,8 +9809,11 @@ case ANYOF_N##NAME:                                                            \
  /* Like the above, but there are differences if we are in uni-8-bit or not, so
   * there are two tests passed in, to use depending on that. There aren't any
   * cases where the label is different from the name, so no need for that
- * parameter */
-#define _C_C_T_(NAME, TEST_8, TEST_7, WORD)                                    \
+ * parameter.
+ * Sets 'what' to WORD which is the property name for non-bitmap code points;
+ * But, uses FOLD_WORD instead if /i has been selected, to allow a different
+ * property name */
+#define _C_C_T_(NAME, TEST_8, TEST_7, WORD, FOLD_WORD)                         \
  ANYOF_##NAME:                                                                  \
      if (LOC) ANYOF_CLASS_SET(ret, ANYOF_##NAME);                               \
      else if (UNI_SEMANTICS) {                                                  \
@@ -9281,7 +9830,12 @@ ANYOF_##NAME:                                                                  \
          }                                                                      \
      }                                                                          \
      yesno = '+';                                                               \
-    what = WORD;                                                               \
+    if (FOLD) {                                                                \
+        what = FOLD_WORD;                                                      \
+    }                                                                          \
+    else {                                                                     \
+        what = WORD;                                                           \
+    }                                                                          \
      break;                                                                     \
  case ANYOF_N##NAME:                                                            \
      if (LOC) ANYOF_CLASS_SET(ret, ANYOF_N##NAME);                              \
@@ -9313,7 +9867,12 @@ case ANYOF_N##NAME:                                                            \
         }                                                                      \
      }                                                                          \
      yesno = '!';                                                               \
-    what = WORD;                                                               \
+    if (FOLD) {                                                                \
+        what = FOLD_WORD;                                                      \
+    }                                                                          \
+    else {                                                                     \
+        what = WORD;                                                           \
+    }                                                                          \
      break
  
  STATIC U8
@@ -9523,8 +10082,23 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
      SV *listsv = NULL;
      STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
                                       than just initialized.  */
+    SV* properties = NULL;    /* Code points that match \p{} \P{} */
+    UV element_count = 0;   /* Number of distinct elements in the class.
+                              Optimizations may be possible if this is tiny */
      UV n;
  
+    /* Unicode properties are stored in a swash; this holds the current one
+     * being parsed.  If this swash is the only above-latin1 component of the
+     * character class, an optimization is to pass it directly on to the
+     * execution engine.  Otherwise, it is set to NULL to indicate that there
+     * are other things in the class that have to be dealt with at execution
+     * time */
+    SV* swash = NULL;          /* Code points that match \p{} \P{} */
+
+    /* Set if a component of this character class is user-defined; just passed
+     * on to the engine */
+    UV has_user_defined_property = 0;
+
      /* code points this node matches that can't be stored in the bitmap */
      SV* nonbitmap = NULL;
  
@@ -9618,8 +10192,10 @@ parseit:
  
         namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
  
-       if (!range)
+       if (!range) {
             rangebegin = RExC_parse;
+           element_count++;
+       }
         if (UTF) {
             value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
@@ -9695,6 +10271,9 @@ parseit:
                     n = 1;
                 }
                 if (!SIZE_ONLY) {
+                    SV** invlistsvp;
+                    SV* invlist;
+                    char* name;
                     if (UCHARAT(RExC_parse) == '^') {
                          RExC_parse++;
                          n--;
@@ -9704,18 +10283,106 @@ parseit:
                               n--;
                          }
                     }
+                    /* Try to get the definition of the property into
+                     * <invlist>.  If /i is in effect, the effective property
+                     * will have its name be <__NAME_i>.  The design is
+                     * discussed in commit
+                     * 2f833f5208e26b208886e51e09e2c072b5eabb46 */
+                    Newx(name, n + sizeof("_i__\n"), char);
+
+                    sprintf(name, "%s%.*s%s\n",
+                                    (FOLD) ? "__" : "",
+                                    (int)n,
+                                    RExC_parse,
+                                    (FOLD) ? "_i" : ""
+                    );
+
+                    /* Look up the property name, and get its swash and
+                     * inversion list, if the property is found  */
+                    if (! (ANYOF_FLAGS(ret) & ANYOF_INVERT)) {
+                    if (swash) {
+                        SvREFCNT_dec(swash);
+                    }
+                    swash = _core_swash_init("utf8", name, &PL_sv_undef,
+                                             1, /* binary */
+                                             0, /* not tr/// */
+                                             TRUE, /* this routine will handle
+                                                      undefined properties */
+                                             NULL, FALSE /* No inversion list */
+                                            );
+                    }
+
+                    if (   ANYOF_FLAGS(ret) & ANYOF_INVERT
+                        || ! swash
+                        || ! SvROK(swash)
+                        || ! SvTYPE(SvRV(swash)) == SVt_PVHV
+                        || ! (invlistsvp =
+                               hv_fetchs(MUTABLE_HV(SvRV(swash)),
+                                "INVLIST", FALSE))
+                        || ! (invlist = *invlistsvp))
+                   {
+                        if (swash) {
+                            SvREFCNT_dec(swash);
+                            swash = NULL;
+                        }
+
+                        /* Here didn't find it.  It could be a user-defined
+                         * property that will be available at run-time.  Add it
+                         * to the list to look up then */
+                        Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n",
+                                        (value == 'p' ? '+' : '!'),
+                                        name);
+                        has_user_defined_property = 1;
+
+                        /* We don't know yet, so have to assume that the
+                         * property could match something in the Latin1 range,
+                         * hence something that isn't utf8 */
+                        ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP_NON_UTF8;
+                    }
+                    else {
  
-                   /* Add the property name to the list.  If /i matching, give
-                    * a different name which consists of the normal name
-                    * sandwiched between two underscores and '_i'.  The design
-                    * is discussed in the commit message for this. */
-                   Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%.*s%s\n",
-                                       (value=='p' ? '+' : '!'),
-                                       (FOLD) ? "__" : "",
-                                       (int)n,
-                                       RExC_parse,
-                                       (FOLD) ? "_i" : ""
-                                   );
+                        /* Here, did get the swash and its inversion list.  If
+                         * the swash is from a user-defined property, then this
+                         * whole character class should be regarded as such */
+                        SV** user_defined_svp =
+                                            hv_fetchs(MUTABLE_HV(SvRV(swash)),
+                                                        "USER_DEFINED", FALSE);
+                        if (user_defined_svp) {
+                            has_user_defined_property
+                                                    |= SvUV(*user_defined_svp);
+                        }
+
+                        /* Invert if asking for the complement */
+                        if (value == 'P') {
+
+                           /* Add to any existing list */
+                           if (! properties) {
+                               properties = invlist_clone(invlist);
+                               _invlist_invert(properties);
+                           }
+                           else {
+                               invlist = invlist_clone(invlist);
+                               _invlist_invert(invlist);
+                               _invlist_union(properties, invlist, &properties);
+                               SvREFCNT_dec(invlist);
+                           }
+
+                            /* The swash can't be used as-is, because we've
+                            * inverted things; delay removing it to here after
+                            * have copied its invlist above */
+                            SvREFCNT_dec(swash);
+                            swash = NULL;
+                        }
+                        else {
+                           if (! properties) {
+                               properties = invlist_clone(invlist);
+                           }
+                           else {
+                               _invlist_union(properties, invlist, &properties);
+                           }
+                       }
+                   }
+                   Safefree(name);
                 }
                 RExC_parse = e + 1;
  
@@ -9859,8 +10526,6 @@ parseit:
                 range = 0; /* this was not a true range */
             }
  
-
-    
             if (!SIZE_ONLY) {
                 const char *what = NULL;
                 char yesno = 0;
@@ -9871,20 +10536,20 @@ parseit:
                  * --jhi */
                 switch ((I32)namedclass) {
                 
-               case _C_C_T_(ALNUMC, isALNUMC_L1, isALNUMC, "XPosixAlnum");
-               case _C_C_T_(ALPHA, isALPHA_L1, isALPHA, "XPosixAlpha");
-               case _C_C_T_(BLANK, isBLANK_L1, isBLANK, "XPosixBlank");
-               case _C_C_T_(CNTRL, isCNTRL_L1, isCNTRL, "XPosixCntrl");
-               case _C_C_T_(GRAPH, isGRAPH_L1, isGRAPH, "XPosixGraph");
-               case _C_C_T_(LOWER, isLOWER_L1, isLOWER, "XPosixLower");
-               case _C_C_T_(PRINT, isPRINT_L1, isPRINT, "XPosixPrint");
-               case _C_C_T_(PSXSPC, isPSXSPC_L1, isPSXSPC, "XPosixSpace");
-               case _C_C_T_(PUNCT, isPUNCT_L1, isPUNCT, "XPosixPunct");
-               case _C_C_T_(UPPER, isUPPER_L1, isUPPER, "XPosixUpper");
+               case _C_C_T_(ALNUMC, isALNUMC_L1, isALNUMC, "XPosixAlnum", "XPosixAlnum");
+               case _C_C_T_(ALPHA, isALPHA_L1, isALPHA, "XPosixAlpha", "XPosixAlpha");
+               case _C_C_T_(BLANK, isBLANK_L1, isBLANK, "XPosixBlank", "XPosixBlank");
+               case _C_C_T_(CNTRL, isCNTRL_L1, isCNTRL, "XPosixCntrl", "XPosixCntrl");
+               case _C_C_T_(GRAPH, isGRAPH_L1, isGRAPH, "XPosixGraph", "XPosixGraph");
+               case _C_C_T_(LOWER, isLOWER_L1, isLOWER, "XPosixLower", "__XPosixLower_i");
+               case _C_C_T_(PRINT, isPRINT_L1, isPRINT, "XPosixPrint", "XPosixPrint");
+               case _C_C_T_(PSXSPC, isPSXSPC_L1, isPSXSPC, "XPosixSpace", "XPosixSpace");
+               case _C_C_T_(PUNCT, isPUNCT_L1, isPUNCT, "XPosixPunct", "XPosixPunct");
+               case _C_C_T_(UPPER, isUPPER_L1, isUPPER, "XPosixUpper", "__XPosixUpper_i");
                  /* \s, \w match all unicode if utf8. */
-                case _C_C_T_(SPACE, isSPACE_L1, isSPACE, "SpacePerl");
-                case _C_C_T_(ALNUM, isWORDCHAR_L1, isALNUM, "Word");
-               case _C_C_T_(XDIGIT, isXDIGIT_L1, isXDIGIT, "XPosixXDigit");
+                case _C_C_T_(SPACE, isSPACE_L1, isSPACE, "SpacePerl", "SpacePerl");
+                case _C_C_T_(ALNUM, isWORDCHAR_L1, isALNUM, "Word", "Word");
+               case _C_C_T_(XDIGIT, isXDIGIT_L1, isXDIGIT, "XPosixXDigit", "XPosixXDigit");
                 case _C_C_T_NOLOC_(VERTWS, is_VERTWS_latin1(&value), "VertSpace");
                 case _C_C_T_NOLOC_(HORIZWS, is_HORIZWS_latin1(&value), "HorizSpace");
                 case ANYOF_ASCII:
@@ -9950,7 +10615,7 @@ parseit:
                 }
                 if (what && ! (AT_LEAST_ASCII_RESTRICTED)) {
                     /* Strings such as "+utf8::isWord\n" */
-                   Perl_sv_catpvf(aTHX_ listsv, "%cutf8::Is%s\n", yesno, what);
+                   Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n", yesno, what);
                 }
  
                 continue;
@@ -10051,16 +10716,16 @@ parseit:
      /* If folding and there are code points above 255, we calculate all
       * characters that could fold to or from the ones already on the list */
      if (FOLD && nonbitmap) {
-       UV i;
+       UV start, end;  /* End points of code point ranges */
  
-       SV* fold_intersection;
-       UV* fold_list;
+       SV* fold_intersection = NULL;
  
         /* This is a list of all the characters that participate in folds
             * (except marks, etc in multi-char folds */
         if (! PL_utf8_foldable) {
             SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0);
             PL_utf8_foldable = _swash_to_invlist(swash);
+            SvREFCNT_dec(swash);
         }
  
         /* This is a hash that for a particular fold gives all characters
@@ -10081,33 +10746,27 @@ parseit:
                 if (! PL_utf8_tofold) {
                     U8 dummy[UTF8_MAXBYTES+1];
                     STRLEN dummy_len;
-                   to_utf8_fold((U8*) "A", dummy, &dummy_len);
+
+                   /* This particular string is above \xff in both UTF-8 and
+                    * UTFEBCDIC */
+                   to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
+                   assert(PL_utf8_tofold); /* Verify that worked */
                 }
                 PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
             }
         }
  
-       /* Only the characters in this class that participate in folds need
-           * be checked.  Get the intersection of this class and all the
-           * possible characters that are foldable.  This can quickly narrow
-           * down a large class */
-       invlist_intersection(PL_utf8_foldable, nonbitmap, &fold_intersection);
+       /* Only the characters in this class that participate in folds need be
+        * checked.  Get the intersection of this class and all the possible
+        * characters that are foldable.  This can quickly narrow down a large
+        * class */
+       _invlist_intersection(PL_utf8_foldable, nonbitmap, &fold_intersection);
  
         /* Now look at the foldable characters in this class individually */
-       fold_list = invlist_array(fold_intersection);
-       for (i = 0; i < invlist_len(fold_intersection); i++) {
+       invlist_iterinit(fold_intersection);
+       while (invlist_iternext(fold_intersection, &start, &end)) {
             UV j;
  
-           /* The next entry is the beginning of the range that is in the
-            * class */
-           UV start = fold_list[i++];
-
-
-           /* The next entry is the beginning of the next range, which
-               * isn't in the class, so the end of the current range is one
-               * less than that */
-           UV end = fold_list[i] - 1;
-
             /* Look at every character in the range */
             for (j = start; j <= end; j++) {
  
@@ -10119,23 +10778,22 @@ parseit:
  
                 if (foldlen > (STRLEN)UNISKIP(f)) {
  
-                   /* Any multicharacter foldings (disallowed in
-                       * lookbehind patterns) require the following
-                       * transform: [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst) where
-                       * E folds into "pq" and F folds into "rst", all other
-                       * characters fold to single characters.  We save away
-                       * these multicharacter foldings, to be later saved as
-                       * part of the additional "s" data. */
+                   /* Any multicharacter foldings (disallowed in lookbehind
+                    * patterns) require the following transform: [ABCDEF] ->
+                    * (?:[ABCabcDEFd]|pq|rst) where E folds into "pq" and F
+                    * folds into "rst", all other characters fold to single
+                    * characters.  We save away these multicharacter foldings,
+                    * to be later saved as part of the additional "s" data. */
                     if (! RExC_in_lookbehind) {
                         U8* loc = foldbuf;
                         U8* e = foldbuf + foldlen;
  
-                       /* If any of the folded characters of this are in
-                           * the Latin1 range, tell the regex engine that
-                           * this can match a non-utf8 target string.  The
-                           * only multi-byte fold whose source is in the
-                           * Latin1 range (U+00DF) applies only when the
-                           * target string is utf8, or under unicode rules */
+                       /* If any of the folded characters of this are in the
+                        * Latin1 range, tell the regex engine that this can
+                        * match a non-utf8 target string.  The only multi-byte
+                        * fold whose source is in the Latin1 range (U+00DF)
+                        * applies only when the target string is utf8, or
+                        * under unicode rules */
                         if (j > 255 || AT_LEAST_UNI_SEMANTICS) {
                             while (loc < e) {
  
@@ -10148,8 +10806,8 @@ parseit:
                                 if (UTF8_IS_INVARIANT(*loc)
                                     || UTF8_IS_DOWNGRADEABLE_START(*loc))
                                 {
-                                   /* Can't mix above and below 256 under
-                                       * LOC */
+                                    /* Can't mix above and below 256 under LOC
+                                     */
                                     if (LOC) {
                                         goto end_multi_fold;
                                     }
@@ -10179,13 +10837,13 @@ parseit:
                 }
                 else {
                     /* Single character fold.  Add everything in its fold
-                       * closure to the list that this node should match */
+                    * closure to the list that this node should match */
                     SV** listp;
  
-                   /* The fold closures data structure is a hash with the
-                       * keys being every character that is folded to, like
-                       * 'k', and the values each an array of everything that
-                       * folds to its key.  e.g. [ 'k', 'K', KELVIN_SIGN ] */
+                   /* The fold closures data structure is a hash with the keys
+                    * being every character that is folded to, like 'k', and
+                    * the values each an array of everything that folds to its
+                    * key.  e.g. [ 'k', 'K', KELVIN_SIGN ] */
                     if ((listp = hv_fetch(PL_utf8_foldclosures,
                                     (char *) foldbuf, foldlen, FALSE)))
                     {
@@ -10199,9 +10857,9 @@ parseit:
                             }
                             c = SvUV(*c_p);
  
-                           /* /aa doesn't allow folds between ASCII and
-                               * non-; /l doesn't allow them between above
-                               * and below 256 */
+                           /* /aa doesn't allow folds between ASCII and non-;
+                            * /l doesn't allow them between above and below
+                            * 256 */
                             if ((MORE_ASCII_RESTRICTED
                                  && (isASCII(c) != isASCII(j)))
                                     || (LOC && ((c < 256) != (j < 256))))
@@ -10215,9 +10873,9 @@ parseit:
                                         (U8) c,
                                         &l1_fold_invlist, &unicode_alternate);
                             }
-                               /* It may be that the code point is already
-                                   * in this range or already in the bitmap,
-                                   * in which case we need do nothing */
+                               /* It may be that the code point is already in
+                                * this range or already in the bitmap, in
+                                * which case we need do nothing */
                             else if ((c < start || c > end)
                                         && (c > 255
                                             || ! ANYOF_BITMAP_TEST(ret, c)))
@@ -10229,43 +10887,104 @@ parseit:
                 }
             }
         }
-       invlist_destroy(fold_intersection);
+       SvREFCNT_dec(fold_intersection);
      }
  
      /* Combine the two lists into one. */
      if (l1_fold_invlist) {
         if (nonbitmap) {
-           invlist_union(nonbitmap, l1_fold_invlist, &nonbitmap);
-           invlist_destroy(l1_fold_invlist);
+           _invlist_union(nonbitmap, l1_fold_invlist, &nonbitmap);
+           SvREFCNT_dec(l1_fold_invlist);
         }
         else {
             nonbitmap = l1_fold_invlist;
         }
      }
  
+    /* And combine the result (if any) with any inversion list from properties.
+     * The lists are kept separate up to now because we don't want to fold the
+     * properties */
+    if (properties) {
+       if (nonbitmap) {
+           _invlist_union(nonbitmap, properties, &nonbitmap);
+           SvREFCNT_dec(properties);
+       }
+       else {
+           nonbitmap = properties;
+       }
+    }
+
+
      /* Here, we have calculated what code points should be in the character
-     * class.   Now we can see about various optimizations.  Fold calculation
-     * needs to take place before inversion.  Otherwise /[^k]/i would invert to
-     * include K, which under /i would match k. */
+     * class.
+     *
+     * Now we can see about various optimizations.  Fold calculation (which we
+     * did above) needs to take place before inversion.  Otherwise /[^k]/i
+     * would invert to include K, which under /i would match k, which it
+     * shouldn't. */
  
      /* Optimize inverted simple patterns (e.g. [^a-z]).  Note that we haven't
-     * set the FOLD flag yet, so this this does optimize those.  It doesn't
+     * set the FOLD flag yet, so this does optimize those.  It doesn't
       * optimize locale.  Doing so perhaps could be done as long as there is
       * nothing like \w in it; some thought also would have to be given to the
       * interaction with above 0x100 chars */
-    if (! LOC
-       && (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT
+    if ((ANYOF_FLAGS(ret) & ANYOF_INVERT)
+        && ! LOC
         && ! unicode_alternate
-       && ! nonbitmap
+       /* In case of /d, there are some things that should match only when in
+        * not in the bitmap, i.e., they require UTF8 to match.  These are
+        * listed in nonbitmap, but if ANYOF_NONBITMAP_NON_UTF8 is set in this
+        * case, they don't require UTF8, so can invert here */
+       && (! nonbitmap
+           || ! DEPENDS_SEMANTICS
+           || (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
         && SvCUR(listsv) == initial_listsv_len)
      {
-       for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
-           ANYOF_BITMAP(ret)[value] ^= 0xFF;
+       int i;
+       if (! nonbitmap) {
+           for (i = 0; i < 256; ++i) {
+               if (ANYOF_BITMAP_TEST(ret, i)) {
+                   ANYOF_BITMAP_CLEAR(ret, i);
+               }
+               else {
+                   ANYOF_BITMAP_SET(ret, i);
+                   prevvalue = value;
+                   value = i;
+               }
+           }
+           /* The inversion means that everything above 255 is matched */
+           ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
+       }
+       else {
+           /* Here, also has things outside the bitmap.  Go through each bit
+            * individually and add it to the list to get rid of from those
+            * things not in the bitmap */
+           SV *remove_list = _new_invlist(2);
+
+           /* Now invert both the bitmap and the nonbitmap.  Anything in the
+            * bitmap has to also be removed from the non-bitmap */
+           _invlist_invert(nonbitmap);
+           for (i = 0; i < 256; ++i) {
+               if (ANYOF_BITMAP_TEST(ret, i)) {
+                   ANYOF_BITMAP_CLEAR(ret, i);
+                   remove_list = add_cp_to_invlist(remove_list, i);
+               }
+               else {
+                   ANYOF_BITMAP_SET(ret, i);
+                   prevvalue = value;
+                   value = i;
+               }
+           }
+
+           /* And do the removal */
+           _invlist_subtract(nonbitmap, remove_list, &nonbitmap);
+           SvREFCNT_dec(remove_list);
+       }
+
         stored = 256 - stored;
  
-       /* The inversion means that everything above 255 is matched; and at the
-        * same time we clear the invert flag */
-       ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
+       /* Clear the invert flag since have just done it here */
+       ANYOF_FLAGS(ret) &= ~ANYOF_INVERT;
      }
  
      /* Folding in the bitmap is taken care of above, but not for locale (for
@@ -10329,17 +11048,24 @@ parseit:
             else {
                 op = EXACT;
             }
-       }   /* else 2 chars in the bit map: the folds of each other */
-       else if (AT_LEAST_UNI_SEMANTICS || !isASCII(value)) {
-
-           /* To join adjacent nodes, they must be the exact EXACTish type.
-            * Try to use the most likely type, by using EXACTFU if the regex
-            * calls for them, or is required because the character is
-            * non-ASCII */
-           op = EXACTFU;
         }
-       else {    /* Otherwise, more likely to be EXACTF type */
-           op = EXACTF;
+       else {   /* else 2 chars in the bit map: the folds of each other */
+
+           /* Use the folded value, which for the cases where we get here,
+            * is just the lower case of the current one (which may resolve to
+            * itself, or to the other one */
+           value = toLOWER_LATIN1(value);
+           if (AT_LEAST_UNI_SEMANTICS || !isASCII(value)) {
+
+               /* To join adjacent nodes, they must be the exact EXACTish
+                * type.  Try to use the most likely type, by using EXACTFU if
+                * the regex calls for them, or is required because the
+                * character is non-ASCII */
+               op = EXACTFU;
+           }
+           else {    /* Otherwise, more likely to be EXACTF type */
+               op = EXACTF;
+           }
         }
  
         ret = reg_node(pRExC_state, op);
@@ -10359,64 +11085,50 @@ parseit:
          return ret;
      }
  
-    if (nonbitmap) {
-       UV* nonbitmap_array = invlist_array(nonbitmap);
-       UV nonbitmap_len = invlist_len(nonbitmap);
-       UV i;
-
-       /*  Here have the full list of items to match that aren't in the
-        *  bitmap.  Convert to the structure that the rest of the code is
-        *  expecting.   XXX That rest of the code should convert to this
-        *  structure */
-       for (i = 0; i < nonbitmap_len; i++) {
-
-           /* The next entry is the beginning of the range that is in the
-            * class */
-           UV start = nonbitmap_array[i++];
-           UV end;
-
-           /* The next entry is the beginning of the next range, which isn't
-            * in the class, so the end of the current range is one less than
-            * that.  But if there is no next range, it means that the range
-            * begun by 'start' extends to infinity, which for this platform
-            * ends at UV_MAX */
-           if (i == nonbitmap_len) {
-               end = UV_MAX;
-           }
-           else {
-               end = nonbitmap_array[i] - 1;
-           }
-
-           if (start == end) {
-               Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", start);
-           }
-           else {
-               /* The \t sets the whole range */
-               Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
-                       /* XXX EBCDIC */
-                                  start, end);
-           }
-       }
-       invlist_destroy(nonbitmap);
+    /* If there is a swash and more than one element, we can't use the swash in
+     * the optimization below. */
+    if (swash && element_count > 1) {
+       SvREFCNT_dec(swash);
+       swash = NULL;
      }
-
-    if (SvCUR(listsv) == initial_listsv_len && ! unicode_alternate) {
+    if (! nonbitmap
+       && SvCUR(listsv) == initial_listsv_len
+       && ! unicode_alternate)
+    {
         ARG_SET(ret, ANYOF_NONBITMAP_EMPTY);
         SvREFCNT_dec(listsv);
         SvREFCNT_dec(unicode_alternate);
      }
      else {
-
+       /* av[0] stores the character class description in its textual form:
+        *       used later (regexec.c:Perl_regclass_swash()) to initialize the
+        *       appropriate swash, and is also useful for dumping the regnode.
+        * av[1] if NULL, is a placeholder to later contain the swash computed
+        *       from av[0].  But if no further computation need be done, the
+        *       swash is stored there now.
+        * av[2] stores the multicharacter foldings, used later in
+        *       regexec.c:S_reginclass().
+        * av[3] stores the nonbitmap inversion list for use in addition or
+        *       instead of av[0]; not used if av[1] isn't NULL
+        * av[4] is set if any component of the class is from a user-defined
+        *       property; not used if av[1] isn't NULL */
         AV * const av = newAV();
         SV *rv;
-       /* The 0th element stores the character class description
-        * in its textual form: used later (regexec.c:Perl_regclass_swash())
-        * to initialize the appropriate swash (which gets stored in
-        * the 1st element), and also useful for dumping the regnode.
-        * The 2nd element stores the multicharacter foldings,
-        * used later (regexec.c:S_reginclass()). */
-       av_store(av, 0, listsv);
-       av_store(av, 1, NULL);
+
+       av_store(av, 0, (SvCUR(listsv) == initial_listsv_len)
+                       ? &PL_sv_undef
+                       : listsv);
+       if (swash) {
+           av_store(av, 1, swash);
+           SvREFCNT_dec(nonbitmap);
+       }
+       else {
+           av_store(av, 1, NULL);
+           if (nonbitmap) {
+               av_store(av, 3, nonbitmap);
+               av_store(av, 4, newSVuv(has_user_defined_property));
+           }
+       }
  
          /* Store any computed multi-char folds only if we are allowing
           * them */
@@ -10493,8 +11205,11 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
      PERL_ARGS_ASSERT_NEXTCHAR;
  
      for (;;) {
-       if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
-               RExC_parse[2] == '#') {
+       if (RExC_end - RExC_parse >= 3
+           && *RExC_parse == '('
+           && RExC_parse[1] == '?'
+           && RExC_parse[2] == '#')
+       {
             while (*RExC_parse != ')') {
                 if (RExC_parse == RExC_end)
                     FAIL("Sequence (?#... not terminated");
@@ -10578,7 +11293,7 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
            We can't do this:
            
            assert(2==regarglen[op]+1); 
-       
+
            Anything larger than this has to allocate the extra amount.
            If we changed this to be:
            
@@ -11184,7 +11899,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
         Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
         if (flags & ANYOF_INVERT)
             sv_catpvs(sv, "^");
-       
+
         /* output what the standard cp 0-255 bitmap matches */
         for (i = 0; i <= 256; i++) {
             if (i < 256 && ANYOF_BITMAP_TEST(o,i)) {
@@ -11228,67 +11943,86 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
             sv_catpvs(sv, "{outside bitmap}");
  
         if (ANYOF_NONBITMAP(o)) {
-           SV *lv;
+           SV *lv; /* Set if there is something outside the bit map */
             SV * const sw = regclass_swash(prog, o, FALSE, &lv, 0);
-       
-           if (lv) {
+            bool byte_output = FALSE;   /* If something in the bitmap has been
+                                           output */
+
+           if (lv && lv != &PL_sv_undef) {
                 if (sw) {
                     U8 s[UTF8_MAXBYTES_CASE+1];
  
-                   for (i = 0; i <= 256; i++) { /* just the first 256 */
+                   for (i = 0; i <= 256; i++) { /* Look at chars in bitmap */
                         uvchr_to_utf8(s, i);
-                       
-                       if (i < 256 && swash_fetch(sw, s, TRUE)) {
+
+                       if (i < 256
+                            && ! ANYOF_BITMAP_TEST(o, i)    /* Don't duplicate
+                                                               things already
+                                                               output as part
+                                                               of the bitmap */
+                            && swash_fetch(sw, s, TRUE))
+                        {
                             if (rangestart == -1)
                                 rangestart = i;
                         } else if (rangestart != -1) {
+                            byte_output = TRUE;
                             if (i <= rangestart + 3)
                                 for (; rangestart < i; rangestart++) {
-                                   const U8 * const e = uvchr_to_utf8(s,rangestart);
-                                   U8 *p;
-                                   for(p = s; p < e; p++)
-                                       put_byte(sv, *p);
+                                   put_byte(sv, rangestart);
                                 }
                             else {
-                               const U8 *e = uvchr_to_utf8(s,rangestart);
-                               U8 *p;
-                               for (p = s; p < e; p++)
-                                   put_byte(sv, *p);
+                               put_byte(sv, rangestart);
                                 sv_catpvs(sv, "-");
-                               e = uvchr_to_utf8(s, i-1);
-                               for (p = s; p < e; p++)
-                                   put_byte(sv, *p);
-                               }
-                               rangestart = -1;
+                               put_byte(sv, i-1);
                             }
+                           rangestart = -1;
                         }
-                       
-                   sv_catpvs(sv, "..."); /* et cetera */
+                   }
                 }
  
                 {
                     char *s = savesvpv(lv);
                     char * const origs = s;
-               
+
                     while (*s && *s != '\n')
                         s++;
-               
+
                     if (*s == '\n') {
                         const char * const t = ++s;
-                       
+
+                        if (byte_output) {
+                            sv_catpvs(sv, " ");
+                        }
+
                         while (*s) {
-                           if (*s == '\n')
+                           if (*s == '\n') {
+
+                                /* Truncate very long output */
+                               if (s - origs > 256) {
+                                   Perl_sv_catpvf(aTHX_ sv,
+                                                  "%.*s...",
+                                                  (int) (s - origs - 1),
+                                                  t);
+                                   goto out_dump;
+                               }
                                 *s = ' ';
+                           }
+                           else if (*s == '\t') {
+                               *s = '-';
+                           }
                             s++;
                         }
                         if (s[-1] == ' ')
                             s[-1] = 0;
-                       
+
                         sv_catpv(sv, t);
                     }
-               
+
+               out_dump:
+
                     Safefree(origs);
                 }
+               SvREFCNT_dec(lv);
             }
         }
  
@@ -11668,7 +12402,8 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
                1: a buffer in a different thread
                2: something we no longer hold a reference on
                so we need to copy it locally.  */
-           /* Note we need to sue SvCUR() on our mother_re, because it, in
+           /* Note we need to use SvCUR(), rather than
+              SvLEN(), on our mother_re, because it, in
                turn, may well be pointing to its own mother_re.  */
             SvPV_set(dstr, SAVEPVN(SvPVX_const(ret->mother_re),
                                    SvCUR(ret->mother_re)+1));
@@ -11816,7 +12551,7 @@ Perl_regnext(pTHX_ register regnode *p)
  }
  #endif
  
-STATIC void    
+STATIC void
  S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
  {
      va_list args;
@@ -11993,7 +12728,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
                 goto after_print;
         } else
             CLEAR_OPTSTART;
-       
+
         regprop(r, sv, node);
         PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
                       (int)(2*indent + 1), "", SvPVX_const(sv));
@@ -12041,7 +12776,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
              sv_setpvs(sv, "");
             for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
                 SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);
-               
+
                  PerlIO_printf(Perl_debug_log, "%*s%s ",
                     (int)(2*(indent+3)), "",
                      elem_ptr ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr), SvCUR(*elem_ptr), 60,