* but allows patterns to get big without disasters.
*
* [The "next" pointer is always aligned on an even
- * boundary, and reads the offset directly as a short. Also, there is no
- * special test to reverse the sign of BACK pointers since the offset is
- * stored negative.]
+ * boundary, and reads the offset directly as a short.]
*/
/* This is the stuff that used to live in regexp.h that was truly
* extra SV*, used only during its construction and which is not used by
* regexec.c. Note that the 'next_off' field is unused, as the SSC stands
* alone, so there is never a next node. Also, there is no alignment issue,
- * becase these are declared or allocated as a complete unit so the compiler
+ * because these are declared or allocated as a complete unit so the compiler
* takes care of alignment. This is unlike the other regnodes which are
* allocated in terms of multiples of a single-argument regnode. SSC nodes can
* have a pointer field because there is no alignment issue, and because it is
U16 next_off;
U32 arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time ... */
- U32 classflags; /* and run-time */
+ U32 classflags; /* ... and run-time */
/* Auxiliary, only used during construction; NULL afterwards: list of code
* points matched */
(ptr)->type = op; (ptr)->next_off = 0; (ptr)++; } STMT_END
#define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \
ARG_SET(ptr, arg); FILL_ADVANCE_NODE(ptr, op); (ptr) += 1; } STMT_END
+#define FILL_ADVANCE_NODE_2L_ARG(ptr, op, arg1, arg2) \
+ STMT_START { \
+ ARG_SET(ptr, arg1); \
+ ARG2L_SET(ptr, arg2); \
+ FILL_ADVANCE_NODE(ptr, op); \
+ (ptr) += 2; \
+ } STMT_END
#define REG_MAGIC 0234
* probably better than that commit anyway. But it could be reinstated if we
* need a bit. The LOC flags are only for /l nodes; the reverted commit was
* only for /d, so there are no combinatorial issues. The LOC flag to use is
- * probably the POSIXL one.
+ * probably the POSIXL one. Now that there is an ANYOFL (locale) node, another
+ * option would be to make all of those include the POSIXL data structure,
+ * which would get rid of needing a separate POSIXL flag. But it would
+ * increase the size of all such nodes, so it's probably not as atractive as
+ * having an ANYOF_POSIXL node type. But if we did do it, note that not all 32
+ * bits of that extra space are used, one bit of that could be set aside for
+ * the LOC_FOLD flag, yielding yet another bit. This would require extra code
+ * for masking, so again not the most attractive solution.
+ *
* Several flags are not used in synthetic start class (SSC) nodes, so could be
* shared should new flags be needed for SSCs, like SSC_MATCHES_EMPTY_STRING
* now. */
#define ANYOF_NLOWER ((ANYOF_LOWER) + 1)
#define ANYOF_PRINT ((_CC_PRINT) * 2)
#define ANYOF_NPRINT ((ANYOF_PRINT) + 1)
-#define ANYOF_PSXSPC ((_CC_PSXSPC) * 2) /* POSIX space: \s plus the vertical tab */
-#define ANYOF_NPSXSPC ((ANYOF_PSXSPC) + 1)
#define ANYOF_PUNCT ((_CC_PUNCT) * 2)
#define ANYOF_NPUNCT ((ANYOF_PUNCT) + 1)
#define ANYOF_SPACE ((_CC_SPACE) * 2) /* \s */
#endif /* DEBUG RELATED DEFINES */
+typedef enum {
+ TRADITIONAL_BOUND = _CC_WORDCHAR,
+ GCB_BOUND,
+ SB_BOUND,
+ WB_BOUND
+} bound_type;
+
/*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
* ex: set ts=8 sts=4 sw=4 et:
*/