This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
op/join.t: don't use done_testing
[perl5.git] / regcomp.c
index 3d49b81..f9dc26c 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -4404,6 +4404,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
            data->flags |= (OP(scan) == MEOL
                            ? SF_BEFORE_MEOL
                            : SF_BEFORE_SEOL);
+           SCAN_COMMIT(pRExC_state, data, minlenp);
+
        }
        else if (  PL_regkind[OP(scan)] == BRANCHJ
                 /* Lookbehind, or need to calculate parens/evals/stclass: */
@@ -5025,7 +5027,7 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, OP *expr,
  * the original pattern needs upgrading to utf8.
  */
 
-bool
+static bool
 S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
     char *pat, STRLEN plen)
 {
@@ -9912,43 +9914,17 @@ tryagain:
            *flagp |= HASWIDTH;
            goto finish_meta_pat;
        case 'w':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = ALNUML;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = ALNUMU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = ALNUMA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = ALNUM;
-                   break;
-               default:
-                   goto bad_charset;
+           op = ALNUM + get_regex_charset(RExC_flags);
+            if (op > ALNUMA) {  /* /aa is same as /a */
+                op = ALNUMA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'W':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NALNUML;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NALNUMU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NALNUMA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NALNUM;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NALNUM + get_regex_charset(RExC_flags);
+            if (op > NALNUMA) { /* /aa is same as /a */
+                op = NALNUMA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
@@ -9956,22 +9932,9 @@ tryagain:
        case 'b':
            RExC_seen_zerolen++;
            RExC_seen |= REG_SEEN_LOOKBEHIND;
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = BOUNDL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = BOUNDU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = BOUNDA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = BOUND;
-                   break;
-               default:
-                   goto bad_charset;
+           op = BOUND + get_regex_charset(RExC_flags);
+            if (op > BOUNDA) {  /* /aa is same as /a */
+                op = BOUNDA;
             }
            ret = reg_node(pRExC_state, op);
            FLAGS(ret) = get_regex_charset(RExC_flags);
@@ -9980,103 +9943,45 @@ tryagain:
        case 'B':
            RExC_seen_zerolen++;
            RExC_seen |= REG_SEEN_LOOKBEHIND;
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NBOUNDL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NBOUNDU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NBOUNDA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NBOUND;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NBOUND + get_regex_charset(RExC_flags);
+            if (op > NBOUNDA) { /* /aa is same as /a */
+                op = NBOUNDA;
             }
            ret = reg_node(pRExC_state, op);
            FLAGS(ret) = get_regex_charset(RExC_flags);
            *flagp |= SIMPLE;
            goto finish_meta_pat;
        case 's':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = SPACEL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = SPACEU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = SPACEA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = SPACE;
-                   break;
-               default:
-                   goto bad_charset;
+           op = SPACE + get_regex_charset(RExC_flags);
+            if (op > SPACEA) {  /* /aa is same as /a */
+                op = SPACEA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'S':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NSPACEL;
-                   break;
-               case REGEX_UNICODE_CHARSET:
-                   op = NSPACEU;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NSPACEA;
-                   break;
-               case REGEX_DEPENDS_CHARSET:
-                   op = NSPACE;
-                   break;
-               default:
-                   goto bad_charset;
-            }
-           ret = reg_node(pRExC_state, op);
-           *flagp |= HASWIDTH|SIMPLE;
-           goto finish_meta_pat;
-       case 'd':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = DIGITL;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = DIGITA;
-                   break;
-               case REGEX_DEPENDS_CHARSET: /* No difference between these */
-               case REGEX_UNICODE_CHARSET:
-                   op = DIGIT;
-                   break;
-               default:
-                   goto bad_charset;
+           op = NSPACE + get_regex_charset(RExC_flags);
+            if (op > NSPACEA) { /* /aa is same as /a */
+                op = NSPACEA;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
            goto finish_meta_pat;
        case 'D':
-           switch (get_regex_charset(RExC_flags)) {
-               case REGEX_LOCALE_CHARSET:
-                   op = NDIGITL;
-                   break;
-               case REGEX_ASCII_RESTRICTED_CHARSET:
-               case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
-                   op = NDIGITA;
-                   break;
-               case REGEX_DEPENDS_CHARSET: /* No difference between these */
-               case REGEX_UNICODE_CHARSET:
-                   op = NDIGIT;
-                   break;
-               default:
-                   goto bad_charset;
+            op = NDIGIT;
+            goto join_D_and_d;
+       case 'd':
+            op = DIGIT;
+        join_D_and_d:
+            {
+                U8 offset = get_regex_charset(RExC_flags);
+                if (offset == REGEX_UNICODE_CHARSET) {
+                    offset = REGEX_DEPENDS_CHARSET;
+                }
+                else if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
+                    offset = REGEX_ASCII_RESTRICTED_CHARSET;
+                }
+                op += offset;
             }
            ret = reg_node(pRExC_state, op);
            *flagp |= HASWIDTH|SIMPLE;
@@ -10305,14 +10210,18 @@ tryagain:
            bool is_exactfu_sharp_s;
 
            ender = 0;
-            node_type = ((! FOLD) ? EXACT
-                       : (LOC)
-                         ? EXACTFL
-                         : (MORE_ASCII_RESTRICTED)
-                           ? EXACTFA
-                           : (AT_LEAST_UNI_SEMANTICS)
-                             ? EXACTFU
-                             : EXACTF);
+            if (! FOLD) {
+                node_type = EXACT;
+            }
+            else {
+                node_type = get_regex_charset(RExC_flags);
+                if (node_type >= REGEX_ASCII_RESTRICTED_CHARSET) {
+                    node_type--; /* /a is same as /u, and map /aa's offset to
+                                    what /a's would have been, so there is no
+                                    hole */
+                }
+                node_type += EXACTF;
+            }
            ret = reg_node(pRExC_state, node_type);
            s = STRING(ret);
 
@@ -10706,11 +10615,6 @@ tryagain:
     }
 
     return(ret);
-
-/* Jumped to when an unrecognized character set is encountered */
-bad_charset:
-    Perl_croak(aTHX_ "panic: Unknown regex character set encoding: %u", get_regex_charset(RExC_flags));
-    return(NULL);
 }
 
 STATIC char *
@@ -11029,171 +10933,6 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
        }                                                                  \
     }
 
-STATIC U8
-S_set_regclass_bit_fold(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr)
-{
-
-    /* Handle the setting of folds in the bitmap for non-locale ANYOF nodes.
-     * Locale folding is done at run-time, so this function should not be
-     * called for nodes that are for locales.
-     *
-     * This function sets the bit corresponding to the fold of the input
-     * 'value', if not already set.  The fold of 'f' is 'F', and the fold of
-     * 'F' is 'f'.
-     *
-     * It also knows about the characters that are in the bitmap that have
-     * folds that are matchable only outside it, and sets the appropriate lists
-     * and flags.
-     *
-     * It returns the number of bits that actually changed from 0 to 1 */
-
-    U8 stored = 0;
-    U8 fold;
-
-    PERL_ARGS_ASSERT_SET_REGCLASS_BIT_FOLD;
-
-    fold = (AT_LEAST_UNI_SEMANTICS) ? PL_fold_latin1[value]
-                                    : PL_fold[value];
-
-    /* It assumes the bit for 'value' has already been set */
-    if (fold != value && ! ANYOF_BITMAP_TEST(node, fold)) {
-        ANYOF_BITMAP_SET(node, fold);
-        stored++;
-    }
-    if (_HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value) && (! isASCII(value) || ! MORE_ASCII_RESTRICTED)) {
-       /* Certain Latin1 characters have matches outside the bitmap.  To get
-        * here, 'value' is one of those characters.   None of these matches is
-        * valid for ASCII characters under /aa, which have been excluded by
-        * the 'if' above.  The matches fall into three categories:
-        * 1) They are singly folded-to or -from an above 255 character, as
-        *    LATIN SMALL LETTER Y WITH DIAERESIS and LATIN CAPITAL LETTER Y
-        *    WITH DIAERESIS;
-        * 2) They are part of a multi-char fold with another character in the
-        *    bitmap, only LATIN SMALL LETTER SHARP S => "ss" fits that bill;
-        * 3) They are part of a multi-char fold with a character not in the
-        *    bitmap, such as various ligatures.
-        * We aren't dealing fully with multi-char folds, except we do deal
-        * with the pattern containing a character that has a multi-char fold
-        * (not so much the inverse).
-        * For types 1) and 3), the matches only happen when the target string
-        * is utf8; that's not true for 2), and we set a flag for it.
-        *
-        * The code below adds to the passed in inversion list the single fold
-        * closures for 'value'.  The values are hard-coded here so that an
-        * innocent-looking character class, like /[ks]/i won't have to go out
-        * to disk to find the possible matches.  XXX It would be better to
-        * generate these via regen, in case a new version of the Unicode
-        * standard adds new mappings, though that is not really likely. */
-       switch (value) {
-           case 'k':
-           case 'K':
-               /* KELVIN SIGN */
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212A);
-               break;
-           case 's':
-           case 'S':
-               /* LATIN SMALL LETTER LONG S */
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x017F);
-               break;
-           case MICRO_SIGN:
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                                GREEK_SMALL_LETTER_MU);
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                                GREEK_CAPITAL_LETTER_MU);
-               break;
-           case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
-           case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
-               /* ANGSTROM SIGN */
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr, 0x212B);
-               if (DEPENDS_SEMANTICS) {    /* See DEPENDS comment below */
-                   *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                                    PL_fold_latin1[value]);
-               }
-               break;
-           case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                       LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
-               break;
-           case LATIN_SMALL_LETTER_SHARP_S:
-               *invlist_ptr = add_cp_to_invlist(*invlist_ptr,
-                                       LATIN_CAPITAL_LETTER_SHARP_S);
-
-               /* Under /a, /d, and /u, this can match the two chars "ss" */
-               if (! MORE_ASCII_RESTRICTED) {
-                   add_alternate(alternate_ptr, (U8 *) "ss", 2);
-
-                   /* And under /u or /a, it can match even if the target is
-                    * not utf8 */
-                   if (AT_LEAST_UNI_SEMANTICS) {
-                       ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8;
-                   }
-               }
-               break;
-           case 'F': case 'f':
-           case 'I': case 'i':
-           case 'L': case 'l':
-           case 'T': case 't':
-           case 'A': case 'a':
-           case 'H': case 'h':
-           case 'J': case 'j':
-           case 'N': case 'n':
-           case 'W': case 'w':
-           case 'Y': case 'y':
-                /* These all are targets of multi-character folds from code
-                 * points that require UTF8 to express, so they can't match
-                 * unless the target string is in UTF-8, so no action here is
-                 * necessary, as regexec.c properly handles the general case
-                 * for UTF-8 matching */
-               break;
-           default:
-               /* Use deprecated warning to increase the chances of this
-                * being output */
-               ckWARN2regdep(RExC_parse, "Perl folding rules are not up-to-date for 0x%x; please use the perlbug utility to report;", value);
-               break;
-       }
-    }
-    else if (DEPENDS_SEMANTICS
-           && ! isASCII(value)
-           && PL_fold_latin1[value] != value)
-    {
-          /* Under DEPENDS rules, non-ASCII Latin1 characters match their
-           * folds only when the target string is in UTF-8.  We add the fold
-           * here to the list of things to match outside the bitmap, which
-           * won't be looked at unless it is UTF8 (or else if something else
-           * says to look even if not utf8, but those things better not happen
-           * under DEPENDS semantics. */
-       *invlist_ptr = add_cp_to_invlist(*invlist_ptr, PL_fold_latin1[value]);
-    }
-
-    return stored;
-}
-
-
-PERL_STATIC_INLINE U8
-S_set_regclass_bit(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 value, SV** invlist_ptr, AV** alternate_ptr)
-{
-    /* This inline function sets a bit in the bitmap if not already set, and if
-     * appropriate, its fold, returning the number of bits that actually
-     * changed from 0 to 1 */
-
-    U8 stored;
-
-    PERL_ARGS_ASSERT_SET_REGCLASS_BIT;
-
-    if (ANYOF_BITMAP_TEST(node, value)) {   /* Already set */
-       return 0;
-    }
-
-    ANYOF_BITMAP_SET(node, value);
-    stored = 1;
-
-    if (FOLD && ! LOC) {       /* Locale folds aren't known until runtime */
-       stored += set_regclass_bit_fold(pRExC_state, node, value, invlist_ptr, alternate_ptr);
-    }
-
-    return stored;
-}
-
 STATIC void
 S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len)
 {
@@ -11241,6 +10980,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
                               Optimizations may be possible if this is tiny */
     UV n;
 
+    /* Certain named classes have equivalents that can appear outside a
+     * character class, e.g. \w.  These flags are set for these classes.  The
+     * first flag indicates the op depends on the character set modifier, like
+     * /d, /u....  The second is for those that don't have this dependency. */
+    bool has_special_charset_op = FALSE;
+    bool has_special_non_charset_op = FALSE;
+
     /* Unicode properties are stored in a swash; this holds the current one
      * being parsed.  If this swash is the only above-latin1 component of the
      * character class, an optimization is to pass it directly on to the
@@ -11253,32 +10999,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
      * on to the engine */
     UV has_user_defined_property = 0;
 
-    /* code points this node matches that can't be stored in the bitmap */
-    SV* nonbitmap = NULL;
-
     /* inversion list of code points this node matches only when the target
      * string is in UTF-8.  (Because is under /d) */
     SV* depends_list = NULL;
 
-    /* The items that are to match that aren't stored in the bitmap, but are a
-     * result of things that are stored there.  This is the fold closure of
-     * such a character, either because it has DEPENDS semantics and shouldn't
-     * be matched unless the target string is utf8, or is a code point that is
-     * too large for the bit map, as for example, the fold of the MICRO SIGN is
-     * above 255.  This all is solely for performance reasons.  By having this
-     * code know the outside-the-bitmap folds that the bitmapped characters are
-     * involved with, we don't have to go out to disk to find the list of
-     * matches, unless the character class includes code points that aren't
-     * storable in the bit map.  That means that a character class with an 's'
-     * in it, for example, doesn't need to go out to disk to find everything
-     * that matches.  A 2nd list is used so that the 'nonbitmap' list is kept
-     * empty unless there is something whose fold we don't know about, and will
-     * have to go out to the disk to find. */
-    SV* l1_fold_invlist = NULL;
+    /* inversion list of code points this node matches.  For much of the
+     * function, it includes only those that match regardless of the utf8ness
+     * of the target string */
+    SV* cp_list = NULL;
 
     /* List of multi-character folds that are matched by this node */
     AV* unicode_alternate  = NULL;
 #ifdef EBCDIC
+    /* In a range, counts how many 0-2 of the ends of it came from literals,
+     * not escapes.  Thus we can tell if 'A' was input vs \x{C1} */
     UV literal_endpoint = 0;
 #endif
     UV stored = 0;  /* how many chars stored in the bitmap */
@@ -11641,20 +11375,49 @@ parseit:
                    ckWARN4reg(RExC_parse,
                               "False [] range \"%*.*s\"",
                               w, w, rangebegin);
-
-                    nonbitmap = add_cp_to_invlist(nonbitmap, '-');
-                       nonbitmap = add_cp_to_invlist(nonbitmap, prevvalue);
+                    cp_list = add_cp_to_invlist(cp_list, '-');
+                    cp_list = add_cp_to_invlist(cp_list, prevvalue);
                }
 
                range = 0; /* this was not a true range */
+                element_count += 2; /* So counts for three values */
            }
 
-           if (!SIZE_ONLY) {
+           if (SIZE_ONLY) {
+
+                /* In the first pass, do a little extra work so below can
+                 * possibly optimize the whole node to one of the nodes that
+                 * correspond to the classes given below */
+
+                /* The optimization will only take place if there is a single
+                 * element in the class, so can skip if there is more than one
+                 */
+                if (element_count == 1) {
 
                /* Possible truncation here but in some 64-bit environments
                 * the compiler gets heartburn about switch on 64-bit values.
                 * A similar issue a little earlier when switching on value.
                 * --jhi */
+                    switch ((I32)namedclass) {
+                        case ANYOF_ALNUM:
+                        case ANYOF_NALNUM:
+                        case ANYOF_DIGIT:
+                        case ANYOF_NDIGIT:
+                        case ANYOF_SPACE:
+                        case ANYOF_NSPACE:
+                            has_special_charset_op = TRUE;
+                            break;
+
+                        case ANYOF_HORIZWS:
+                        case ANYOF_NHORIZWS:
+                        case ANYOF_VERTWS:
+                        case ANYOF_NVERTWS:
+                            has_special_non_charset_op = TRUE;
+                            break;
+                    }
+                }
+            }
+            else {
                switch ((I32)namedclass) {
 
                case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */
@@ -11715,10 +11478,12 @@ parseit:
                     * them */
                    DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, properties,
                         PL_PosixDigit, "XPosixDigit", listsv);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_NDIGIT:
                    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
                         PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_GRAPH:
                    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
@@ -11729,16 +11494,18 @@ parseit:
                         PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
                    break;
                case ANYOF_HORIZWS:
-                   /* For these, we use the nonbitmap, as /d doesn't make a
+                   /* For these, we use the cp_list, as /d doesn't make a
                     * difference in what these match.  There would be problems
                     * if these characters had folds other than themselves, as
-                    * nonbitmap is subject to folding.  It turns out that \h
+                    * cp_list is subject to folding.  It turns out that \h
                     * is just a synonym for XPosixBlank */
-                   _invlist_union(nonbitmap, PL_XPosixBlank, &nonbitmap);
+                   _invlist_union(cp_list, PL_XPosixBlank, &cp_list);
+                    has_special_non_charset_op = TRUE;
                    break;
                case ANYOF_NHORIZWS:
-                    _invlist_union_complement_2nd(nonbitmap,
-                                                 PL_XPosixBlank, &nonbitmap);
+                    _invlist_union_complement_2nd(cp_list,
+                                                 PL_XPosixBlank, &cp_list);
+                    has_special_non_charset_op = TRUE;
                    break;
                case ANYOF_LOWER:
                case ANYOF_NLOWER:
@@ -11797,10 +11564,12 @@ parseit:
                case ANYOF_SPACE:
                     DO_POSIX(ret, namedclass, properties,
                                             PL_PerlSpace, PL_XPerlSpace);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_NSPACE:
                     DO_N_POSIX(ret, namedclass, properties,
                                             PL_PerlSpace, PL_XPerlSpace);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_UPPER:   /* Same as LOWER, above */
                case ANYOF_NUPPER:
@@ -11832,21 +11601,25 @@ parseit:
                case ANYOF_ALNUM:   /* Really is 'Word' */
                    DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
                             PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_NALNUM:
                    DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
                             PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
+                    has_special_charset_op = TRUE;
                    break;
                case ANYOF_VERTWS:
-                   /* For these, we use the nonbitmap, as /d doesn't make a
+                   /* For these, we use the cp_list, as /d doesn't make a
                     * difference in what these match.  There would be problems
                     * if these characters had folds other than themselves, as
-                    * nonbitmap is subject to folding */
-                   _invlist_union(nonbitmap, PL_VertSpace, &nonbitmap);
+                    * cp_list is subject to folding */
+                   _invlist_union(cp_list, PL_VertSpace, &cp_list);
+                    has_special_non_charset_op = TRUE;
                    break;
                case ANYOF_NVERTWS:
-                    _invlist_union_complement_2nd(nonbitmap,
-                                                    PL_VertSpace, &nonbitmap);
+                    _invlist_union_complement_2nd(cp_list,
+                                                    PL_VertSpace, &cp_list);
+                    has_special_non_charset_op = TRUE;
                    break;
                case ANYOF_XDIGIT:
                     DO_POSIX(ret, namedclass, properties,
@@ -11893,8 +11666,8 @@ parseit:
                               "False [] range \"%*.*s\"",
                               w, w, rangebegin);
                    }
-                   if (!SIZE_ONLY)
-                    nonbitmap = add_cp_to_invlist(nonbitmap, '-');
+                    if (!SIZE_ONLY)
+                        cp_list = add_cp_to_invlist(cp_list, '-');
                } else
                    range = 1;  /* yeah, it's a range! */
                continue;       /* but do it the next time */
@@ -11910,7 +11683,7 @@ parseit:
        /* now is the next time */
        if (!SIZE_ONLY) {
 #ifndef EBCDIC
-            nonbitmap = _add_range_to_invlist(nonbitmap, prevvalue, value);
+            cp_list = _add_range_to_invlist(cp_list, prevvalue, value);
 #else
             UV* this_range = _new_invlist(1);
             _append_range_to_invlist(this_range, prevvalue, value);
@@ -11929,9 +11702,8 @@ parseit:
             {
                 _invlist_intersection(this_range, PL_ASCII, &this_range, );
                 _invlist_intersection(this_range, PL_Alpha, &this_range, );
-
             }
-            _invlist_union(nonbitmap, this_range, &nonbitmap);
+            _invlist_union(cp_list, this_range, &cp_list);
             literal_endpoint = 0;
 #endif
         }
@@ -11939,7 +11711,100 @@ parseit:
        range = 0; /* this range (if it was one) is done now */
     }
 
+    /* [\w] can be optimized into \w, but not if there is anything else in the
+     * brackets (except for an initial '^' which indictes omplementing).  We
+     * also can optimize the common special case /[0-9]/ into /\d/a */
+    if (element_count == 1 &&
+        (has_special_charset_op
+         || has_special_non_charset_op
+         || (prevvalue == '0' && value == '9')))
+    {
+        U8 op;
+        bool invert = ANYOF_FLAGS(ret) & ANYOF_INVERT;
+        const char * cur_parse = RExC_parse;
+
+        if (has_special_charset_op) {
+            U8 offset = get_regex_charset(RExC_flags);
+
+            /* /aa is the same as /a for these */
+            if (offset == REGEX_ASCII_MORE_RESTRICTED_CHARSET) {
+                offset = REGEX_ASCII_RESTRICTED_CHARSET;
+            }
+            switch ((I32)namedclass) {
+                case ANYOF_NALNUM:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_ALNUM:
+                    op = ALNUM;
+                    break;
+                case ANYOF_NSPACE:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_SPACE:
+                    op = SPACE;
+                    break;
+                case ANYOF_NDIGIT:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_DIGIT:
+                    op = DIGIT;
+
+                    /* There is no DIGITU */
+                    if (offset == REGEX_UNICODE_CHARSET) {
+                        offset = REGEX_DEPENDS_CHARSET;
+                    }
+                    break;
+                default:
+                    Perl_croak(aTHX_ "panic: Named character class %"IVdf" is not expected to have a non-[...] version", namedclass);
+            }
+
+            /* The number of varieties of each of these is the same, hence, so
+             * is the delta between the normal and complemented nodes */
+            if (invert) {
+                offset += NALNUM - ALNUM;
+            }
+
+            op += offset;
+        }
+        else if (has_special_non_charset_op) {
+            switch ((I32)namedclass) {
+                case ANYOF_NHORIZWS:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_HORIZWS:
+                    op = HORIZWS;
+                    break;
+                case ANYOF_NVERTWS:
+                    invert = ! invert;
+                    /* FALLTHROUGH */
+                case ANYOF_VERTWS:
+                    op = VERTWS;
+                    break;
+                default:
+                    Perl_croak(aTHX_ "panic: Named character class %"IVdf" is not expected to have a non-[...] version", namedclass);
+            }
+
+            /* The complement version of each of these nodes is adjacently next
+             * */
+            if (invert) {
+                op++;
+            }
+        }
+        else {  /* The remaining possibility is [0-9] */
+            op = (invert) ? NDIGITA : DIGITA;
+        }
+
+        /* Throw away this ANYOF regnode, and emit the calculated one, which
+         * should correspond to the beginning, not current, state of the parse
+         */
+        RExC_parse = (char *)orig_parse;
+        RExC_emit = (regnode *)orig_emit;
+        ret = reg_node(pRExC_state, op);
+        RExC_parse = (char *) cur_parse;
 
+        SvREFCNT_dec(listsv);
+        return ret;
+    }
 
     if (SIZE_ONLY)
         return ret;
@@ -11947,12 +11812,12 @@ parseit:
 
     /* If folding, we calculate all characters that could fold to or from the
      * ones already on the list */
-    if (FOLD && nonbitmap) {
+    if (FOLD && cp_list) {
        UV start, end;  /* End points of code point ranges */
 
        SV* fold_intersection = NULL;
 
-        const UV highest_index = invlist_len(nonbitmap) - 1;
+        const UV highest_index = invlist_len(cp_list) - 1;
 
         /* In the Latin1 range, the characters that can be folded-to or -from
          * are precisely the alphabetic characters.  If the highest code point
@@ -11963,54 +11828,56 @@ parseit:
          * Otherwise, it starts a range that isn't in the set, so the max is
          * one less than it */
         if (! ELEMENT_RANGE_MATCHES_INVLIST(highest_index)
-            && invlist_array(nonbitmap)[highest_index] <= 256)
+            && invlist_array(cp_list)[highest_index] <= 256)
         {
-            _invlist_intersection(PL_L1PosixAlpha, nonbitmap, &fold_intersection);
+            _invlist_intersection(PL_L1PosixAlpha, cp_list, &fold_intersection);
         }
         else {
 
+            /* This is a list of all the characters that participate in folds
+             * (except marks, etc in multi-char folds */
+            if (! PL_utf8_foldable) {
+                SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0);
+                PL_utf8_foldable = _swash_to_invlist(swash);
+                SvREFCNT_dec(swash);
+            }
 
-       /* This is a list of all the characters that participate in folds
-           * (except marks, etc in multi-char folds */
-       if (! PL_utf8_foldable) {
-           SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0);
-           PL_utf8_foldable = _swash_to_invlist(swash);
-            SvREFCNT_dec(swash);
-       }
-
-       /* This is a hash that for a particular fold gives all characters
-           * that are involved in it */
-       if (! PL_utf8_foldclosures) {
-
-           /* If we were unable to find any folds, then we likely won't be
-            * able to find the closures.  So just create an empty list.
-            * Folding will effectively be restricted to the non-Unicode rules
-            * hard-coded into Perl.  (This case happens legitimately during
-            * compilation of Perl itself before the Unicode tables are
-            * generated) */
-           if (invlist_len(PL_utf8_foldable) == 0) {
-               PL_utf8_foldclosures = newHV();
-           } else {
-               /* If the folds haven't been read in, call a fold function
-                   * to force that */
-               if (! PL_utf8_tofold) {
-                   U8 dummy[UTF8_MAXBYTES+1];
-                   STRLEN dummy_len;
-
-                   /* This particular string is above \xff in both UTF-8 and
-                    * UTFEBCDIC */
-                   to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
-                   assert(PL_utf8_tofold); /* Verify that worked */
-               }
-               PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
-           }
-       }
+            /* This is a hash that for a particular fold gives all characters
+             * that are involved in it */
+            if (! PL_utf8_foldclosures) {
+
+                /* If we were unable to find any folds, then we likely won't be
+                 * able to find the closures.  So just create an empty list.
+                 * Folding will effectively be restricted to the non-Unicode
+                 * rules hard-coded into Perl.  (This case happens legitimately
+                 * during compilation of Perl itself before the Unicode tables
+                 * are generated) */
+                if (invlist_len(PL_utf8_foldable) == 0) {
+                    PL_utf8_foldclosures = newHV();
+                }
+                else {
+                    /* If the folds haven't been read in, call a fold function
+                     * to force that */
+                    if (! PL_utf8_tofold) {
+                        U8 dummy[UTF8_MAXBYTES+1];
+                        STRLEN dummy_len;
+
+                        /* This particular string is above \xff in both UTF-8
+                         * and UTFEBCDIC */
+                        to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
+                        assert(PL_utf8_tofold); /* Verify that worked */
+                    }
+                    PL_utf8_foldclosures =
+                                        _swash_inversion_hash(PL_utf8_tofold);
+                }
+            }
 
-       /* Only the characters in this class that participate in folds need be
-        * checked.  Get the intersection of this class and all the possible
-        * characters that are foldable.  This can quickly narrow down a large
-        * class */
-       _invlist_intersection(PL_utf8_foldable, nonbitmap, &fold_intersection);
+            /* Only the characters in this class that participate in folds need
+             * be checked.  Get the intersection of this class and all the
+             * possible characters that are foldable.  This can quickly narrow
+             * down a large class */
+            _invlist_intersection(PL_utf8_foldable, cp_list,
+                                  &fold_intersection);
         }
 
        /* Now look at the foldable characters in this class individually */
@@ -12045,8 +11912,8 @@ parseit:
                         /* ASCII is always matched; non-ASCII is matched only
                          * under Unicode rules */
                         if (isASCII(j) || AT_LEAST_UNI_SEMANTICS) {
-                            nonbitmap =
-                                add_cp_to_invlist(nonbitmap, PL_fold_latin1[j]);
+                            cp_list =
+                                add_cp_to_invlist(cp_list, PL_fold_latin1[j]);
                         }
                         else {
                             depends_list =
@@ -12086,33 +11953,33 @@ parseit:
                             case 'k':
                             case 'K':
                                 /* KELVIN SIGN */
-                                nonbitmap =
-                                    add_cp_to_invlist(nonbitmap, 0x212A);
+                                cp_list =
+                                    add_cp_to_invlist(cp_list, 0x212A);
                                 break;
                             case 's':
                             case 'S':
                                 /* LATIN SMALL LETTER LONG S */
-                                nonbitmap =
-                                    add_cp_to_invlist(nonbitmap, 0x017F);
+                                cp_list =
+                                    add_cp_to_invlist(cp_list, 0x017F);
                                 break;
                             case MICRO_SIGN:
-                                nonbitmap = add_cp_to_invlist(nonbitmap,
+                                cp_list = add_cp_to_invlist(cp_list,
                                                     GREEK_SMALL_LETTER_MU);
-                                nonbitmap = add_cp_to_invlist(nonbitmap,
+                                cp_list = add_cp_to_invlist(cp_list,
                                                     GREEK_CAPITAL_LETTER_MU);
                                 break;
                             case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
                             case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
                                 /* ANGSTROM SIGN */
-                                nonbitmap =
-                                        add_cp_to_invlist(nonbitmap, 0x212B);
+                                cp_list =
+                                        add_cp_to_invlist(cp_list, 0x212B);
                                 break;
                             case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
-                                nonbitmap = add_cp_to_invlist(nonbitmap,
+                                cp_list = add_cp_to_invlist(cp_list,
                                         LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS);
                                 break;
                             case LATIN_SMALL_LETTER_SHARP_S:
-                                nonbitmap = add_cp_to_invlist(nonbitmap,
+                                cp_list = add_cp_to_invlist(cp_list,
                                                 LATIN_CAPITAL_LETTER_SHARP_S);
 
                                 /* Under /a, /d, and /u, this can match the two
@@ -12159,7 +12026,12 @@ parseit:
                 /* Here is an above Latin1 character.  We don't have the rules
                  * hard-coded for it.  First, get its fold */
                f = _to_uni_fold_flags(j, foldbuf, &foldlen,
-                                    ((allow_full_fold) ? FOLD_FLAGS_FULL : 0);
+                                    ((allow_full_fold) ? FOLD_FLAGS_FULL : 0)
+                                    | ((LOC)
+                                        ? FOLD_FLAGS_LOCALE
+                                        : (MORE_ASCII_RESTRICTED)
+                                            ? FOLD_FLAGS_NOMIX_ASCII
+                                            : 0));
 
                if (foldlen > (STRLEN)UNISKIP(f)) {
 
@@ -12176,31 +12048,18 @@ parseit:
                        /* If any of the folded characters of this are in the
                         * Latin1 range, tell the regex engine that this can
                         * match a non-utf8 target string.  */
-                           while (loc < e) {
-
-                               /* Can't mix ascii with non- under /aa */
-                               if (MORE_ASCII_RESTRICTED
-                                   && (isASCII(*loc) != isASCII(j)))
-                               {
-                                   goto end_multi_fold;
-                               }
-                               if (UTF8_IS_INVARIANT(*loc)
-                                   || UTF8_IS_DOWNGRADEABLE_START(*loc))
-                               {
-                                    /* Can't mix above and below 256 under LOC
-                                     */
-                                   if (LOC) {
-                                       goto end_multi_fold;
-                                   }
-                                   ANYOF_FLAGS(ret)
-                                           |= ANYOF_NONBITMAP_NON_UTF8;
-                                   break;
-                               }
-                               loc += UTF8SKIP(loc);
-                           }
+                        while (loc < e) {
+                            if (UTF8_IS_INVARIANT(*loc)
+                                || UTF8_IS_DOWNGRADEABLE_START(*loc))
+                            {
+                                ANYOF_FLAGS(ret)
+                                        |= ANYOF_NONBITMAP_NON_UTF8;
+                                break;
+                            }
+                            loc += UTF8SKIP(loc);
+                        }
 
                        add_alternate(&unicode_alternate, foldbuf, foldlen);
-                   end_multi_fold: ;
                    }
                }
                 else {
@@ -12229,17 +12088,17 @@ parseit:
                            /* /aa doesn't allow folds between ASCII and non-;
                             * /l doesn't allow them between above and below
                             * 256 */
-                           if ((MORE_ASCII_RESTRICTED
-                                && (isASCII(c) != isASCII(j)))
-                                   || (LOC && ((c < 256) != (j < 256))))
+                           if ((MORE_ASCII_RESTRICTED && (isASCII(c) != isASCII(j)))
+                               || (LOC && ((c < 256) != (j < 256))))
                            {
                                continue;
                            }
 
                             /* Folds involving non-ascii Latin1 characters
                              * under /d are added to a separate list */
-                           if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS) {
-                               nonbitmap = add_cp_to_invlist(nonbitmap, c);
+                           if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
+                            {
+                               cp_list = add_cp_to_invlist(cp_list, c);
                             }
                             else {
                                 depends_list = add_cp_to_invlist(depends_list, c);
@@ -12247,49 +12106,41 @@ parseit:
                        }
                    }
                }
-           }
+            }
        }
        SvREFCNT_dec(fold_intersection);
     }
 
-    /* Combine the two lists into one. */
-    if (l1_fold_invlist) {
-       if (nonbitmap) {
-           _invlist_union(nonbitmap, l1_fold_invlist, &nonbitmap);
-           SvREFCNT_dec(l1_fold_invlist);
-       }
-       else {
-           nonbitmap = l1_fold_invlist;
-       }
-    }
-
     /* And combine the result (if any) with any inversion list from properties.
      * The lists are kept separate up to now because we don't want to fold the
      * properties */
     if (properties) {
         if (AT_LEAST_UNI_SEMANTICS) {
-       if (nonbitmap) {
-           _invlist_union(nonbitmap, properties, &nonbitmap);
-           SvREFCNT_dec(properties);
-       }
-       else {
-           nonbitmap = properties;
-       }
+            if (cp_list) {
+                _invlist_union(cp_list, properties, &cp_list);
+                SvREFCNT_dec(properties);
+            }
+            else {
+                cp_list = properties;
+            }
         }
         else {
 
             /* Under /d, we put the things that match only when the target
              * string is utf8, into a separate list */
             SV* nonascii_but_latin1_properties = NULL;
-            _invlist_intersection(properties, PL_Latin1, &nonascii_but_latin1_properties);
-            _invlist_subtract(nonascii_but_latin1_properties, PL_ASCII, &nonascii_but_latin1_properties);
-            _invlist_subtract(properties, nonascii_but_latin1_properties, &properties);
-            if (nonbitmap) {
-                _invlist_union(nonbitmap, properties, &nonbitmap);
+            _invlist_intersection(properties, PL_Latin1,
+                                  &nonascii_but_latin1_properties);
+            _invlist_subtract(nonascii_but_latin1_properties, PL_ASCII,
+                              &nonascii_but_latin1_properties);
+            _invlist_subtract(properties, nonascii_but_latin1_properties,
+                              &properties);
+            if (cp_list) {
+                _invlist_union(cp_list, properties, &cp_list);
                 SvREFCNT_dec(properties);
             }
             else {
-                nonbitmap = properties;
+                cp_list = properties;
             }
 
             if (depends_list) {
@@ -12303,20 +12154,51 @@ parseit:
         }
     }
 
-    /* Here, <nonbitmap> contains all the code points we can determine at
+    /* Here, we have calculated what code points should be in the character
+     * class.
+     *
+     * Now we can see about various optimizations.  Fold calculation (which we
+     * did above) needs to take place before inversion.  Otherwise /[^k]/i
+     * would invert to include K, which under /i would match k, which it
+     * shouldn't. */
+
+    /* Optimize inverted simple patterns (e.g. [^a-z]).  Note that we haven't
+     * set the FOLD flag yet, so this does optimize those.  It doesn't
+     * optimize locale.  Doing so perhaps could be done as long as there is
+     * nothing like \w in it; some thought also would have to be given to the
+     * interaction with above 0x100 chars */
+    if ((ANYOF_FLAGS(ret) & ANYOF_INVERT)
+        && ! LOC
+       && ! depends_list
+       && ! unicode_alternate
+       && SvCUR(listsv) == initial_listsv_len)
+    {
+        _invlist_invert(cp_list);
+
+        /* Any swash can't be used as-is, because we've inverted things */
+        if (swash) {
+            SvREFCNT_dec(swash);
+            swash = NULL;
+        }
+
+       /* Clear the invert flag since have just done it here */
+       ANYOF_FLAGS(ret) &= ~ANYOF_INVERT;
+    }
+
+    /* Here, <cp_list> contains all the code points we can determine at
      * compile time that match under all conditions.  Go through it, and
      * for things that belong in the bitmap, put them there, and delete from
-     * <nonbitmap> */
-    if (nonbitmap) {
+     * <cp_list> */
+    if (cp_list) {
 
        /* This gets set if we actually need to modify things */
        bool change_invlist = FALSE;
 
        UV start, end;
 
-       /* Start looking through <nonbitmap> */
-       invlist_iterinit(nonbitmap);
-       while (invlist_iternext(nonbitmap, &start, &end)) {
+       /* Start looking through <cp_list> */
+       invlist_iterinit(cp_list);
+       while (invlist_iternext(cp_list, &start, &end)) {
            UV high;
            int i;
 
@@ -12340,147 +12222,27 @@ parseit:
        }
 
         /* Done with loop; remove any code points that are in the bitmap from
-         * <nonbitmap> */
+         * <cp_list> */
        if (change_invlist) {
-           _invlist_subtract(nonbitmap, PL_Latin1, &nonbitmap);
+           _invlist_subtract(cp_list, PL_Latin1, &cp_list);
        }
 
        /* If have completely emptied it, remove it completely */
-       if (invlist_len(nonbitmap) == 0) {
-           SvREFCNT_dec(nonbitmap);
-           nonbitmap = NULL;
+       if (invlist_len(cp_list) == 0) {
+           SvREFCNT_dec(cp_list);
+           cp_list = NULL;
        }
     }
 
     /* Combine the two lists into one. */
     if (depends_list) {
-       if (nonbitmap) {
-           _invlist_union(nonbitmap, depends_list, &nonbitmap);
+       if (cp_list) {
+           _invlist_union(cp_list, depends_list, &cp_list);
            SvREFCNT_dec(depends_list);
        }
        else {
-           nonbitmap = depends_list;
-       }
-    }
-
-    /* Here, we have calculated what code points should be in the character
-     * class.  <nonbitmap> does not overlap the bitmap except possibly in the
-     * case of DEPENDS rules.
-     *
-     * Now we can see about various optimizations.  Fold calculation (which we
-     * did above) needs to take place before inversion.  Otherwise /[^k]/i
-     * would invert to include K, which under /i would match k, which it
-     * shouldn't. */
-
-    /* Optimize inverted simple patterns (e.g. [^a-z]).  Note that we haven't
-     * set the FOLD flag yet, so this does optimize those.  It doesn't
-     * optimize locale.  Doing so perhaps could be done as long as there is
-     * nothing like \w in it; some thought also would have to be given to the
-     * interaction with above 0x100 chars */
-    if ((ANYOF_FLAGS(ret) & ANYOF_INVERT)
-        && ! LOC
-       && ! unicode_alternate
-       /* In case of /d, there are some things that should match only when in
-        * not in the bitmap, i.e., they require UTF8 to match.  These are
-        * listed in nonbitmap, but if ANYOF_NONBITMAP_NON_UTF8 is set in this
-        * case, they don't require UTF8, so can invert here */
-       && (! nonbitmap
-           || ! DEPENDS_SEMANTICS
-           || (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
-       && SvCUR(listsv) == initial_listsv_len)
-    {
-       int i;
-       if (! nonbitmap) {
-           for (i = 0; i < 256; ++i) {
-               if (ANYOF_BITMAP_TEST(ret, i)) {
-                   ANYOF_BITMAP_CLEAR(ret, i);
-               }
-               else {
-                   ANYOF_BITMAP_SET(ret, i);
-                   prevvalue = value;
-                   value = i;
-               }
-           }
-           /* The inversion means that everything above 255 is matched */
-           ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
-       }
-       else {
-           /* Here, also has things outside the bitmap that may overlap with
-            * the bitmap.  We have to sync them up, so that they get inverted
-            * in both places.  Earlier, we removed all overlaps except in the
-            * case of /d rules, so no syncing is needed except for this case
-            */
-           SV *remove_list = NULL;
-
-           if (DEPENDS_SEMANTICS) {
-               UV start, end;
-
-               /* Set the bits that correspond to the ones that aren't in the
-                * bitmap.  Otherwise, when we invert, we'll miss these.
-                * Earlier, we removed from the nonbitmap all code points
-                * < 128, so there is no extra work here */
-               invlist_iterinit(nonbitmap);
-               while (invlist_iternext(nonbitmap, &start, &end)) {
-                   if (start > 255) {  /* The bit map goes to 255 */
-                       break;
-                   }
-                   if (end > 255) {
-                       end = 255;
-                   }
-                   for (i = start; i <= (int) end; ++i) {
-                       ANYOF_BITMAP_SET(ret, i);
-                       prevvalue = value;
-                       value = i;
-                   }
-               }
-           }
-
-           /* Now invert both the bitmap and the nonbitmap.  Anything in the
-            * bitmap has to also be removed from the non-bitmap, but again,
-            * there should not be overlap unless is /d rules. */
-           _invlist_invert(nonbitmap);
-
-           /* Any swash can't be used as-is, because we've inverted things */
-           if (swash) {
-               SvREFCNT_dec(swash);
-               swash = NULL;
-           }
-
-           for (i = 0; i < 256; ++i) {
-               if (ANYOF_BITMAP_TEST(ret, i)) {
-                   ANYOF_BITMAP_CLEAR(ret, i);
-                   if (DEPENDS_SEMANTICS) {
-                       if (! remove_list) {
-                           remove_list = _new_invlist(2);
-                       }
-                       remove_list = add_cp_to_invlist(remove_list, i);
-                   }
-               }
-               else {
-                   ANYOF_BITMAP_SET(ret, i);
-                   prevvalue = value;
-                   value = i;
-               }
-           }
-
-           /* And do the removal */
-           if (DEPENDS_SEMANTICS) {
-               if (remove_list) {
-                   _invlist_subtract(nonbitmap, remove_list, &nonbitmap);
-                   SvREFCNT_dec(remove_list);
-               }
-           }
-           else {
-               /* There is no overlap for non-/d, so just delete anything
-                * below 256 */
-               _invlist_intersection(nonbitmap, PL_AboveLatin1, &nonbitmap);
-           }
+           cp_list = depends_list;
        }
-
-       stored = 256 - stored;
-
-       /* Clear the invert flag since have just done it here */
-       ANYOF_FLAGS(ret) &= ~ANYOF_INVERT;
     }
 
     /* Folding in the bitmap is taken care of above, but not for locale (for
@@ -12490,7 +12252,7 @@ parseit:
      * run-time fold flag for these */
     if (FOLD && (LOC
                || (DEPENDS_SEMANTICS
-                   && nonbitmap
+                   && cp_list
                    && ! (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
                || unicode_alternate))
     {
@@ -12511,7 +12273,7 @@ parseit:
      * characters which only have the two folds; so things like 'fF' and 'Ii'
      * wouldn't work because they are part of the fold of 'LATIN SMALL LIGATURE
      * FI'. */
-    if (! nonbitmap
+    if (! cp_list
        && ! unicode_alternate
        && SvCUR(listsv) == initial_listsv_len
        && ! (ANYOF_FLAGS(ret) & (ANYOF_INVERT|ANYOF_UNICODE_ALL))
@@ -12598,7 +12360,7 @@ parseit:
        SvREFCNT_dec(swash);
        swash = NULL;
     }
-    if (! nonbitmap
+    if (! cp_list
        && SvCUR(listsv) == initial_listsv_len
        && ! unicode_alternate)
     {
@@ -12615,7 +12377,7 @@ parseit:
         *       swash is stored there now.
         * av[2] stores the multicharacter foldings, used later in
         *       regexec.c:S_reginclass().
-        * av[3] stores the nonbitmap inversion list for use in addition or
+        * av[3] stores the cp_list inversion list for use in addition or
         *       instead of av[0]; not used if av[1] isn't NULL
         * av[4] is set if any component of the class is from a user-defined
         *       property; not used if av[1] isn't NULL */
@@ -12627,12 +12389,12 @@ parseit:
                        : listsv);
        if (swash) {
            av_store(av, 1, swash);
-           SvREFCNT_dec(nonbitmap);
+           SvREFCNT_dec(cp_list);
        }
        else {
            av_store(av, 1, NULL);
-           if (nonbitmap) {
-               av_store(av, 3, nonbitmap);
+           if (cp_list) {
+               av_store(av, 3, cp_list);
                av_store(av, 4, newSVuv(has_user_defined_property));
            }
        }