char string[1];
};
+struct regnode_lstring { /* Constructed this way to keep the string aligned. */
+ U8 flags;
+ U8 type;
+ U16 next_off;
+ U32 str_len; /* Only 16 bits allowed before would overflow 'next_off' */
+ char string[1];
+};
+
/* Argument bearing node - workhorse,
arg1 is often for the data field */
struct regnode_1 {
#undef OP
#undef OPERAND
-#undef MASK
#undef STRING
#define OP(p) ((p)->type)
#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
regnode types. For some, it's the \
character set of the regnode */
-#define OPERAND(p) (((struct regnode_string *)p)->string)
-#define MASK(p) ((char*)OPERAND(p))
-#define STR_LEN(p) (((struct regnode_string *)p)->str_len)
-#define STRING(p) (((struct regnode_string *)p)->string)
-#define STR_SZ(l) ((l + sizeof(regnode) - 1) / sizeof(regnode))
-#define NODE_SZ_STR(p) (STR_SZ(STR_LEN(p))+1)
+#define STR_LENs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_ONLY8) \
+ ((struct regnode_string *)p)->str_len)
+#define STRINGs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_ONLY8) \
+ ((struct regnode_string *)p)->string)
+#define OPERANDs(p) STRINGs(p)
+
+/* Long strings. Currently limited to length 18 bits, which handles a 262000
+ * byte string. The limiting factor is the 16 bit 'next_off' field, which
+ * points to the next regnode, so the furthest away it can be is 2**16. On
+ * most architectures, regnodes are 2**2 bytes long, so that yields 2**18
+ * bytes. Should a longer string be desired, we could increase it to 26 bits
+ * fairly easily, by changing this node to have longj type which causes the ARG
+ * field to be used for the link to the next regnode (although code would have
+ * to be changed to account for this), and then use a combination of the flags
+ * and next_off fields for the length. To get 34 bit length, also change the
+ * node to be an ARG2L, using the second 32 bit field for the length, and not
+ * using the flags nor next_off fields at all. One could have an llstring node
+ * and even an lllstring type. */
+#define STR_LENl(p) (__ASSERT_(OP(p) == LEXACT || OP(p) == LEXACT_ONLY8) \
+ (((struct regnode_lstring *)p)->str_len))
+#define STRINGl(p) (__ASSERT_(OP(p) == LEXACT || OP(p) == LEXACT_ONLY8) \
+ (((struct regnode_lstring *)p)->string))
+#define OPERANDl(p) STRINGl(p)
+
+#define STR_LEN(p) ((OP(p) == LEXACT || OP(p) == LEXACT_ONLY8) \
+ ? STR_LENl(p) : STR_LENs(p))
+#define STRING(p) ((OP(p) == LEXACT || OP(p) == LEXACT_ONLY8) \
+ ? STRINGl(p) : STRINGs(p))
+#define OPERAND(p) STRING(p)
+
+/* The number of (smallest) regnode equivalents that a string of length l bytes
+ * occupies */
+#define STR_SZ(l) (((l) + sizeof(regnode) - 1) / sizeof(regnode))
+
+/* The number of (smallest) regnode equivalents that the EXACTISH node 'p'
+ * occupies */
+#define NODE_SZ_STR(p) (STR_SZ(STR_LEN(p)) + 1 + regarglen[(p)->type])
+
+#define setSTR_LEN(p,v) \
+ STMT_START{ \
+ if (OP(p) == LEXACT || OP(p) == LEXACT_ONLY8) \
+ ((struct regnode_lstring *)(p))->str_len = (v); \
+ else \
+ ((struct regnode_string *)(p))->str_len = (v); \
+ } STMT_END
#undef NODE_ALIGN
#undef ARG_LOC
# define UCHARAT(p) ((int)*(p)&CHARMASK)
#endif
+/* Number of regnode equivalents that 'guy' occupies beyond the size of the
+ * smallest regnode. */
#define EXTRA_SIZE(guy) ((sizeof(guy)-1)/sizeof(struct regnode))
#define REG_ZERO_LEN_SEEN 0x00000001
#define RE_DEBUG_EXECUTE_TRIE 0x000400
/* Extra */
-#define RE_DEBUG_EXTRA_MASK 0xFF0000
-#define RE_DEBUG_EXTRA_TRIE 0x010000
-#define RE_DEBUG_EXTRA_OFFSETS 0x020000
-#define RE_DEBUG_EXTRA_OFFDEBUG 0x040000
-#define RE_DEBUG_EXTRA_STATE 0x080000
-#define RE_DEBUG_EXTRA_OPTIMISE 0x100000
-#define RE_DEBUG_EXTRA_BUFFERS 0x400000
-#define RE_DEBUG_EXTRA_GPOS 0x800000
+#define RE_DEBUG_EXTRA_MASK 0x1FF0000
+#define RE_DEBUG_EXTRA_TRIE 0x0010000
+#define RE_DEBUG_EXTRA_OFFSETS 0x0020000
+#define RE_DEBUG_EXTRA_OFFDEBUG 0x0040000
+#define RE_DEBUG_EXTRA_STATE 0x0080000
+#define RE_DEBUG_EXTRA_OPTIMISE 0x0100000
+#define RE_DEBUG_EXTRA_BUFFERS 0x0400000
+#define RE_DEBUG_EXTRA_GPOS 0x0800000
+#define RE_DEBUG_EXTRA_DUMP_PRE_OPTIMIZE 0x1000000
/* combined */
-#define RE_DEBUG_EXTRA_STACK 0x280000
+#define RE_DEBUG_EXTRA_STACK 0x0280000
#define RE_DEBUG_FLAG(x) (re_debug_flags & x)
/* Compile */
#define DEBUG_GPOS_r(x) DEBUG_r( \
if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_GPOS)) x )
+#define DEBUG_DUMP_PRE_OPTIMIZE_r(x) DEBUG_r( \
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_DUMP_PRE_OPTIMIZE)) x )
+
/* initialization */
/* get_sv() can return NULL during global destruction. */
#define GET_RE_DEBUG_FLAGS DEBUG_r({ \
WB_BOUND
} bound_type;
+/* This unpacks the FLAGS field of ANYOFHx nodes. The value it contains
+ * gives the strict lower bound for the UTF-8 start byte of any code point
+ * matchable by the node, and a loose upper bound as well.
+ *
+ * The low bound is stored in the upper 6 bits, plus 0xC0.
+ * The loose upper bound is determined from the lowest 2 bits and the low bound
+ * (called x) as follows:
+ *
+ * 11 The upper limit of the range can be as much as (EF - x) / 8
+ * 10 The upper limit of the range can be as much as (EF - x) / 4
+ * 01 The upper limit of the range can be as much as (EF - x) / 2
+ * 00 The upper limit of the range can be as much as EF
+ *
+ * For motivation of this design, see commit message in
+ * 3146c00a633e9cbed741e10146662fbcedfdb8d3 */
+#ifdef EBCDIC
+# define MAX_ANYOF_HRx_BYTE 0xF4
+#else
+# define MAX_ANYOF_HRx_BYTE 0xEF
+#endif
+#define LOWEST_ANYOF_HRx_BYTE(b) (((b) >> 2) + 0xC0)
+#define HIGHEST_ANYOF_HRx_BYTE(b) \
+ (LOWEST_ANYOF_HRx_BYTE(b) \
+ + ((MAX_ANYOF_HRx_BYTE - LOWEST_ANYOF_HRx_BYTE(b)) >> ((b) & 3)))
+
#endif /* PERL_REGCOMP_H_ */
/*