+ /* Optimize inverted simple patterns (e.g. [^a-z]). Note that we haven't
+ * set the FOLD flag yet, so this this does optimize those. It doesn't
+ * optimize locale. Doing so perhaps could be done as long as there is
+ * nothing like \w in it; some thought also would have to be given to the
+ * interaction with above 0x100 chars */
+ if (! LOC && (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
+ for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
+ ANYOF_BITMAP(ret)[value] ^= 0xFF;
+ stored = 256 - stored;
+
+ /* The inversion means that everything above 255 is matched; and at the
+ * same time we clear the invert flag */
+ ANYOF_FLAGS(ret) = ANYOF_UTF8|ANYOF_UNICODE_ALL;
+ }
+
+ if (FOLD) {
+ SV *sv;
+
+ /* This is the one character in the bitmap that needs special handling
+ * under non-locale folding, as it folds to two characters 'ss'. This
+ * happens if it is set and not inverting, or isn't set and are
+ * inverting */
+ if (! LOC
+ && (cBOOL(ANYOF_BITMAP_TEST(ret, LATIN_SMALL_LETTER_SHARP_S))
+ ^ cBOOL(ANYOF_FLAGS(ret) & ANYOF_INVERT)))
+ {
+ OP(ret) = ANYOFV; /* Can match more than a single char */
+
+ /* Under Unicode semantics), it can do this when the target string
+ * isn't in utf8 */
+ if (UNI_SEMANTICS) {
+ ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP_NON_UTF8;
+ }
+
+ if (!unicode_alternate) {
+ unicode_alternate = newAV();
+ }
+ sv = newSVpvn_utf8("ss", 2, TRUE);
+ av_push(unicode_alternate, sv);
+ }
+
+ /* Folding in the bitmap is taken care of above, but not for locale
+ * (for which we have to wait to see what folding is in effect at
+ * runtime), and for things not in the bitmap. Set run-time fold flag
+ * for these */
+ if ((LOC || (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP))) {
+ ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD;
+ }
+ }
+
+ /* A single character class can be "optimized" into an EXACTish node.
+ * Note that since we don't currently count how many characters there are
+ * outside the bitmap, we are XXX missing optimization possibilities for
+ * them. This optimization can't happen unless this is a truly single
+ * character class, which means that it can't be an inversion into a
+ * many-character class, and there must be no possibility of there being
+ * things outside the bitmap. 'stored' (only) for locales doesn't include
+ * \w, etc, so have to make a special test that they aren't present
+ *
+ * Similarly A 2-character class of the very special form like [bB] can be
+ * optimized into an EXACTFish node, but only for non-locales, and for
+ * characters which only have the two folds; so things like 'fF' and 'Ii'
+ * wouldn't work because they are part of the fold of 'LATIN SMALL LIGATURE
+ * FI'. */
+ if (! (ANYOF_FLAGS(ret) & (ANYOF_NONBITMAP|ANYOF_INVERT|ANYOF_UNICODE_ALL))
+ && (((stored == 1 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
+ || (! ANYOF_CLASS_TEST_ANY_SET(ret)))))
+ || (stored == 2 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
+ && (! _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value))
+ /* If the latest code point has a fold whose
+ * bit is set, it must be the only other one */
+ && ((prevvalue = PL_fold_latin1[value]) != (IV)value)
+ && ANYOF_BITMAP_TEST(ret, prevvalue)))))
+ {
+ /* Note that the information needed to decide to do this optimization
+ * is not currently available until the 2nd pass, and that the actually
+ * used EXACTish node takes less space than the calculated ANYOF node,
+ * and hence the amount of space calculated in the first pass is larger
+ * than actually used, so this optimization doesn't gain us any space.
+ * But an EXACT node is faster than an ANYOF node, and can be combined
+ * with any adjacent EXACT nodes later by the optimizer for further
+ * gains. The speed of executing an EXACTF is similar to an ANYOF
+ * node, so the optimization advantage comes from the ability to join
+ * it to adjacent EXACT nodes */
+