- if( stored == 1 && (value < 128 || (value < 256 && !UTF))
- && !( ANYOF_FLAGS(ret) & ( ANYOF_FLAGS_ALL ^ ANYOF_FOLD ) )
- ) {
- /* optimize single char class to an EXACT node but *only* when its not
- * a UTF/high char. Note that the information needed to decide to do
- * this optimization is not currently available until the 2nd pass, and
- * that the actually used EXACT node takes less space than the
- * calculated ANYOF node, and hence the amount of space calculated in
- * the first pass is larger than actually used. Currently we don't
- * keep track of enough information to do this for nodes which contain
- * matches outside the bitmap */
+ /* Folding in the bitmap is taken care of above, but not for locale, for
+ * which we have to wait to see what folding is in effect at runtime, and
+ * for things not in the bitmap */
+ if (FOLD && (LOC || ANYOF_FLAGS(ret) & ANYOF_NONBITMAP)) {
+ ANYOF_FLAGS(ret) |= ANYOF_FOLD;
+ }
+
+ /* Optimize inverted simple patterns (e.g. [^a-z]). Note that this doesn't
+ * optimize locale. Doing so perhaps could be done as long as there is
+ * nothing like \w in it; some thought also would have to be given to the
+ * interaction with above 0x100 chars */
+ if ((ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
+ for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
+ ANYOF_BITMAP(ret)[value] ^= 0xFF;
+ stored = 256 - stored;
+
+ /* The inversion means that everything above 255 is matched */
+ ANYOF_FLAGS(ret) = ANYOF_UTF8|ANYOF_UNICODE_ALL;
+ }
+
+ /* A single character class can be "optimized" into an EXACTish node.
+ * Note that since we don't currently count how many characters there are
+ * outside the bitmap, we are XXX missing optimization possibilities for
+ * them. This optimization can't happen unless this is a truly single
+ * character class, which means that it can't be an inversion into a
+ * many-character class, and there must be no possibility of there being
+ * things outside the bitmap. 'stored' (only) for locales doesn't include
+ * \w, etc, so have to make a special test that they aren't present
+ *
+ * Similarly A 2-character class of the very special form like [bB] can be
+ * optimized into an EXACTFish node, but only for non-locales, and for
+ * characters which only have the two folds; so things like 'fF' and 'Ii'
+ * wouldn't work because they are part of the fold of 'LATIN SMALL LIGATURE
+ * FI'. */
+ if (! (ANYOF_FLAGS(ret) & (ANYOF_NONBITMAP|ANYOF_INVERT|ANYOF_UNICODE_ALL))
+ && (((stored == 1 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
+ || (! ANYOF_CLASS_TEST_ANY_SET(ret)))))
+ || (stored == 2 && ((! (ANYOF_FLAGS(ret) & ANYOF_LOCALE))
+ && (! _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value))
+ /* If the latest code point has a fold whose
+ * bit is set, it must be the only other one */
+ && ((prevvalue = PL_fold_latin1[value]) != (IV)value)
+ && ANYOF_BITMAP_TEST(ret, prevvalue)))))
+ {
+ /* Note that the information needed to decide to do this optimization
+ * is not currently available until the 2nd pass, and that the actually
+ * used EXACTish node takes less space than the calculated ANYOF node,
+ * and hence the amount of space calculated in the first pass is larger
+ * than actually used, so this optimization doesn't gain us any space.
+ * But an EXACT node is faster than an ANYOF node, and can be combined
+ * with any adjacent EXACT nodes later by the optimizer for further
+ * gains. The speed of executing an EXACTF is similar to an ANYOF
+ * node, so the optimization advantage comes from the ability to join
+ * it to adjacent EXACT nodes */
+