/* Flags for node->flags of ANYOF. These are in short supply, so some games
* are done to share them, as described below. If necessary, the ANYOF_LOCALE
- * and ANYOF_CLASS bits could be shared with a space penalty for locale nodes
- * (and the code at the time this comment was written, is written so that all
- * that is necessary to make the change would be to redefine the ANYOF_CLASS
- * define). Once the planned change to compile all the above-latin1 code points
- * is done, then the UNICODE_ALL bit can be freed up. If flags need to be
- * added that are applicable to the synthetic start class only, with some work,
- * they could be put in the next-node field, or in an unused bit of the
- * classflags field. */
+ * and ANYOF_CLASS bits could be shared with a space penalty for locale nodes,
+ * but this isn't quite so easy, as the optimizer also uses ANYOF_CLASS.
+ * Once the planned change to compile all the above-latin1 code points is done,
+ * then the UNICODE_ALL bit can be freed up, with a small performance penalty.
+ * If flags need to be added that are applicable to the synthetic start class
+ * only, with some work, they could be put in the next-node field, or in an
+ * unused bit of the classflags field. */
-#define ANYOF_LOCALE 0x01
+#define ANYOF_LOCALE 0x01 /* /l modifier */
/* The fold is calculated and stored in the bitmap where possible at compile
* time. However there are two cases where it isn't possible. These share
#define ANYOF_INVERT 0x04
+/* Set if this is a struct regnode_charclass_class vs a regnode_charclass. This
+ * is used for runtime \d, \w, [:posix:], ..., which are used only in locale
+ * and the optimizer's synthetic start class. Non-locale \d, etc are resolved
+ * at compile-time */
+#define ANYOF_CLASS 0x08
+#define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */
+
/* EOS, meaning that it can match an empty string too, is used for the
- * synthetic start class (ssc) only. It looks like it could share the INVERT
- * bit, as the ssc is never inverted. But doing that caused this reges to
- * not match:
- * 'foo/file.fob' =~ m,^(?=[^\.])[^/]* /(?=[^\.])[^/]*\.fo[^/]$,;
- * (except the space between the * and the / above shouldn't be there; it was
- * inserted to make this comment continue on.)
- * Rather than try to figure out what was going on in the optimizer, I (khw)
- * found a way to save a different bit. But my original line of reasoning was
- * "The bit just needs to be turned off before regexec.c gets a hold of it so
- * that regexec.c doesn't think it's inverted, but this happens automatically,
- * as if the ssc can match an EOS, the ssc is discarded, and never passed to
- * regexec.c" */
+ * synthetic start class only. */
#define ANYOF_EOS 0x10
-/* CLASS is never set unless LOCALE is too: has runtime \d, \w, [:posix:], ...
- * The non-locale ones are resolved at compile-time */
-#define ANYOF_CLASS 0x08
-#define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */
+/* ? Is this node the synthetic start class (ssc). This bit is shared with
+ * ANYOF_EOS, as the latter is used only for the ssc, and then not used by
+ * regexec.c. And, the code is structured so that if it is set, the ssc is
+ * not used, so it is guaranteed to be 0 for the ssc by the time regexec.c
+ * gets executed, and 0 for a non-ssc ANYOF node, as it only ever gets set for
+ * a potential ssc candidate. Thus setting it to 1 after it has been
+ * determined that the ssc will be used is not ambiguous */
+#define ANYOF_IS_SYNTHETIC ANYOF_EOS
/* Can match something outside the bitmap that isn't in utf8 */
#define ANYOF_NONBITMAP_NON_UTF8 0x20
#define ANYOF_FLAGS_ALL 0xff
+/* These are the flags that ANYOF_INVERT being set or not doesn't affect
+ * whether they are operative or not. e.g., the node still has LOCALE
+ * regardless of being inverted; whereas ANYOF_UNICODE_ALL means something
+ * different if inverted */
+#define INVERSION_UNAFFECTED_FLAGS (ANYOF_LOCALE \
+ |ANYOF_LOC_NONBITMAP_FOLD \
+ |ANYOF_CLASS \
+ |ANYOF_EOS \
+ |ANYOF_NONBITMAP_NON_UTF8)
+
/* Character classes for node->classflags of ANYOF */
/* Should be synchronized with a table in regprop() */
/* 2n should pair with 2n+1 */
#define ANYOF_CLASS_TEST(p, c) (ANYOF_CLASS_BYTE(p, c) & ANYOF_BIT(c))
#define ANYOF_CLASS_ZERO(ret) Zero(((struct regnode_charclass_class*)(ret))->classflags, ANYOF_CLASSBITMAP_SIZE, char)
+#define ANYOF_CLASS_SETALL(ret) \
+ memset (((struct regnode_charclass_class*)(ret))->classflags, 255, ANYOF_CLASSBITMAP_SIZE)
#define ANYOF_BITMAP_ZERO(ret) Zero(((struct regnode_charclass*)(ret))->bitmap, ANYOF_BITMAP_SIZE, char)
#define ANYOF_BITMAP(p) (((struct regnode_charclass*)(p))->bitmap)
#define ANYOF_SKIP ((ANYOF_SIZE - 1)/sizeof(regnode))
#define ANYOF_CLASS_SKIP ((ANYOF_CLASS_SIZE - 1)/sizeof(regnode))
-/* The class bit can be set to the locale one if necessary to save bits at the
- * expense of having locale ANYOF nodes always have a class bit map, and hence
- * take up extra space. This allows convenient changing it as development
- * proceeds on this */
-#if ANYOF_CLASS == ANYOF_LOCALE
-# undef ANYOF_CLASS_ADD_SKIP
-# define ANYOF_ADD_LOC_SKIP (ANYOF_CLASS_SKIP - ANYOF_SKIP)
-
- /* Quicker way to see if there are actually any tests. This is because
- * currently the set of tests can be empty even when the class bitmap is
- * allocated */
-# if ANYOF_CLASSBITMAP_SIZE != 4
-# error ANYOF_CLASSBITMAP_SIZE is expected to be 4
-# endif
-# define ANYOF_CLASS_TEST_ANY_SET(p) /* assumes sizeof(p) = 4 */ \
- memNE (((struct regnode_charclass_class*)(p))->classflags, \
- "\0\0\0\0", ANYOF_CLASSBITMAP_SIZE)
-#else
-# define ANYOF_CLASS_ADD_SKIP (ANYOF_CLASS_SKIP - ANYOF_SKIP)
-# undef ANYOF_ADD_LOC_SKIP
-# define ANYOF_CLASS_TEST_ANY_SET(p) (ANYOF_FLAGS(p) & ANYOF_CLASS)
+#if ANYOF_CLASSBITMAP_SIZE != 4
+# error ANYOF_CLASSBITMAP_SIZE is expected to be 4
#endif
+#define ANYOF_CLASS_TEST_ANY_SET(p) ((ANYOF_FLAGS(p) & ANYOF_CLASS) \
+ && memNE (((struct regnode_charclass_class*)(p))->classflags, \
+ "\0\0\0\0", ANYOF_CLASSBITMAP_SIZE))
+/*#define ANYOF_CLASS_ADD_SKIP (ANYOF_CLASS_SKIP - ANYOF_SKIP)
+ * */
/*
#ifdef DEBUGGING
-#define GET_RE_DEBUG_FLAGS_DECL VOL IV re_debug_flags = 0; GET_RE_DEBUG_FLAGS;
+#define GET_RE_DEBUG_FLAGS_DECL VOL IV re_debug_flags \
+ PERL_UNUSED_DECL = 0; GET_RE_DEBUG_FLAGS;
#define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2) \
const char * const rpv = \