Used to make it easier to clone and free arbitrary
data that the regops need. Often the ARG field of
a regop is an index into this structure */
- struct reg_code_block *code_blocks;/* positions of literal (?{}) */
- int num_code_blocks; /* size of code_blocks[] */
+ struct reg_code_blocks *code_blocks;/* positions of literal (?{}) */
regnode program[1]; /* Unwarranted chumminess with compiler. */
} regexp_internal;
};
/* has runtime (locale) \d, \w, ..., [:posix:] classes */
-struct regnode_charclass_class {
+struct regnode_charclass_posixl {
U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */
U8 type;
U16 next_off;
Impose a limit of REG_INFTY on various pattern matching operations
to limit stack growth and to avoid "infinite" recursions.
*/
-/* The default size for REG_INFTY is I16_MAX, which is the same as
- SHORT_MAX (see perl.h). Unfortunately I16 isn't necessarily 16 bits
- (see handy.h). On the Cray C90, sizeof(short)==4 and hence I16_MAX is
- ((1<<31)-1), while on the Cray T90, sizeof(short)==8 and I16_MAX is
- ((1<<63)-1). To limit stack growth to reasonable sizes, supply a
+/* The default size for REG_INFTY is U16_MAX, which is the same as
+ USHORT_MAX (see perl.h). Unfortunately U16 isn't necessarily 16 bits
+ (see handy.h). On the Cray C90, sizeof(short)==4 and hence U16_MAX is
+ ((1<<32)-1), while on the Cray T90, sizeof(short)==8 and U16_MAX is
+ ((1<<64)-1). To limit stack growth to reasonable sizes, supply a
smaller default.
--Andy Dougherty 11 June 1998
*/
#if SHORTSIZE > 2
# ifndef REG_INFTY
-# define REG_INFTY ((1<<15)-1)
+# define REG_INFTY ((1<<16)-1)
# endif
#endif
#ifndef REG_INFTY
-# define REG_INFTY I16_MAX
+# define REG_INFTY U16_MAX
#endif
#define ARG_VALUE(arg) (arg)
#define NEXTOPER(p) ((p) + NODE_STEP_REGNODE)
#define PREVOPER(p) ((p) - NODE_STEP_REGNODE)
-#define FILL_ADVANCE_NODE(ptr, op) STMT_START { \
- (ptr)->type = op; (ptr)->next_off = 0; (ptr)++; } STMT_END
-#define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \
- ARG_SET(ptr, arg); FILL_ADVANCE_NODE(ptr, op); (ptr) += 1; } STMT_END
-#define FILL_ADVANCE_NODE_2L_ARG(ptr, op, arg1, arg2) \
- STMT_START { \
- ARG_SET(ptr, arg1); \
- ARG2L_SET(ptr, arg2); \
- FILL_ADVANCE_NODE(ptr, op); \
- (ptr) += 2; \
- } STMT_END
+#define FILL_NODE(offset, op) \
+ STMT_START { \
+ OP(REGNODE_p(offset)) = op; \
+ NEXT_OFF(REGNODE_p(offset)) = 0; \
+ } STMT_END
+#define FILL_ADVANCE_NODE(offset, op) \
+ STMT_START { \
+ FILL_NODE(offset, op); \
+ (offset)++; \
+ } STMT_END
+#define FILL_ADVANCE_NODE_ARG(offset, op, arg) \
+ STMT_START { \
+ ARG_SET(REGNODE_p(offset), arg); \
+ FILL_ADVANCE_NODE(offset, op); \
+ /* This is used generically for other operations \
+ * that have a longer argument */ \
+ (offset) += regarglen[op]; \
+ } STMT_END
+#define FILL_ADVANCE_NODE_2L_ARG(offset, op, arg1, arg2) \
+ STMT_START { \
+ ARG_SET(REGNODE_p(offset), arg1); \
+ ARG2L_SET(REGNODE_p(offset), arg2); \
+ FILL_ADVANCE_NODE(offset, op); \
+ (offset) += 2; \
+ } STMT_END
#define REG_MAGIC 0234
-#define SIZE_ONLY cBOOL(RExC_emit == (regnode *) & RExC_emit_dummy)
-#define PASS1 SIZE_ONLY
-#define PASS2 (! SIZE_ONLY)
-
/* An ANYOF node is basically a bitmap with the index being a code point. If
* the bit for that code point is 1, the code point matches; if 0, it doesn't
* match (complemented if inverted). There is an additional mechanism to deal
* never reach this high). */
#define ANYOF_ONLY_HAS_BITMAP ((U32) -1)
-/* When the bimap isn't completely sufficient for handling the ANYOF node,
+/* When the bitmap isn't completely sufficient for handling the ANYOF node,
* flags (in node->flags of the ANYOF node) get set to indicate this. These
* are perennially in short supply. Beyond several cases where warnings need
* to be raised under certain circumstances, currently, there are six cases
* shared with another, so it doesn't occupy extra space.
*
* At the moment, there is one spare bit, but this could be increased by
- * various tricks.
+ * various tricks:
*
- * If just one more bit is needed, at this writing it seems to khw that the
+ * If just one more bit is needed, as of this writing it seems to khw that the
* best choice would be to make ANYOF_MATCHES_ALL_ABOVE_BITMAP not a flag, but
* something like
*
* handler function, as the macro REGINCLASS in regexec.c does now for other
* cases.
*
- * Another possibility is to instead (or additionally) rename the ANYOF_POSIXL
- * flag to be ANYOFL_LARGE, to mean that the ANYOF node has an extra 32 bits
- * beyond what a regular one does. That's what it effectively means now, with
- * the extra space all for the POSIX class flags. But those classes actually
- * only occupy 30 bits, so the ANYOFL_FOLD and
- * ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags could be moved
- * to that extra space. The 30 bits in the extra word would indicate if a
- * posix class should be looked up or not. The downside of this is that ANYOFL
- * nodes with folding would always have to have the extra space allocated, even
- * if they didn't use the 30 posix bits. There isn't an SSC problem as all
- * SSCs are this large anyway.
+ * Another possibility is based on the fact that ANYOF_MATCHES_POSIXL is
+ * redundant with the node type ANYOFPOSIXL. That flag could be removed, but
+ * at the expense of extra code in regexec.c. The flag has been retained
+ * because it allows us to see if we need to call reginsert, or just use the
+ * bitmap in one test.
*
- * One could completely remove ANYOFL_LARGE and make all ANYOFL nodes large.
- * REGINCLASS would have to be modified so that if the node type were this, it
- * would call reginclass(), as the flag bit that indicates to do this now would
- * be gone.
+ * If this is done, an extension would be to make all ANYOFL nodes contain the
+ * extra 32 bits that ANYOFPOSIXL ones do. The posix flags only occupy 30
+ * bits, so the ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags
+ * and ANYOFL_FOLD could be moved to that extra space, but it would mean extra
+ * instructions, as there are currently places in the code that assume those
+ * two bits are zero.
*
* All told, 5 bits could be available for other uses if all of the above were
* done.
#define ANYOFL_FOLD 0x04
/* Shared bit set only with ANYOFL and SSC nodes:
- * If ANYOFL_FOLD is set, this means there are potential matches valid
- * only if the locale is a UTF-8 one.
- * If ANYOFL_FOLD is NOT set, this means to warn if the runtime locale
+ * If ANYOFL_FOLD is set, this flag indicates there are potential matches
+ * valid only if the locale is a UTF-8 one.
+ * If ANYOFL_FOLD is NOT set, this flag means to warn if the runtime locale
* isn't a UTF-8 one (and the generated node assumes a UTF-8 locale).
* None of INVERT, POSIXL,
* ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP
#define ANYOF_BIT(c) (1U << ((c) & 7))
-#define ANYOF_POSIXL_SET(p, c) (((regnode_charclass_posixl*) (p))->classflags |= (1U << (c)))
-#define ANYOF_CLASS_SET(p, c) ANYOF_POSIXL_SET((p), (c))
+#define POSIXL_SET(field, c) ((field) |= (1U << (c)))
+#define ANYOF_POSIXL_SET(p, c) POSIXL_SET(((regnode_charclass_posixl*) (p))->classflags, (c))
+
+#define POSIXL_CLEAR(field, c) ((field) &= ~ (1U <<(c)))
+#define ANYOF_POSIXL_CLEAR(p, c) POSIXL_CLEAR(((regnode_charclass_posixl*) (p))->classflags, (c))
-#define ANYOF_POSIXL_CLEAR(p, c) (((regnode_charclass_posixl*) (p))->classflags &= ~ (1U <<(c)))
-#define ANYOF_CLASS_CLEAR(p, c) ANYOF_POSIXL_CLEAR((p), (c))
+#define POSIXL_TEST(field, c) ((field) & (1U << (c)))
+#define ANYOF_POSIXL_TEST(p, c) POSIXL_TEST(((regnode_charclass_posixl*) (p))->classflags, (c))
-#define ANYOF_POSIXL_TEST(p, c) (((regnode_charclass_posixl*) (p))->classflags & (1U << (c)))
-#define ANYOF_CLASS_TEST(p, c) ANYOF_POSIXL_TEST((p), (c))
+#define POSIXL_ZERO(field) STMT_START { (field) = 0; } STMT_END
+#define ANYOF_POSIXL_ZERO(ret) POSIXL_ZERO(((regnode_charclass_posixl*) (ret))->classflags)
-#define ANYOF_POSIXL_ZERO(ret) STMT_START { ((regnode_charclass_posixl*) (ret))->classflags = 0; } STMT_END
-#define ANYOF_CLASS_ZERO(ret) ANYOF_POSIXL_ZERO(ret)
+#define ANYOF_POSIXL_SET_TO_BITMAP(p, bits) \
+ STMT_START { \
+ ((regnode_charclass_posixl*) (p))->classflags = (bits); \
+ } STMT_END
/* Shifts a bit to get, eg. 0x4000_0000, then subtracts 1 to get 0x3FFF_FFFF */
#define ANYOF_POSIXL_SETALL(ret) STMT_START { ((regnode_charclass_posixl*) (ret))->classflags = ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1; } STMT_END
#define ANYOF_BITMAP_CLEARALL(p) \
Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE)
-#define ANYOF_SKIP EXTRA_SIZE(regnode_charclass)
-#define ANYOF_POSIXL_SKIP EXTRA_SIZE(regnode_charclass_posixl)
-
/*
* Utility definitions.
*/
#define RE_DEBUG_FLAG(x) (re_debug_flags & x)
/* Compile */
#define DEBUG_COMPILE_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_COMPILE_MASK) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_MASK)) x )
#define DEBUG_PARSE_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_COMPILE_PARSE) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_PARSE)) x )
#define DEBUG_OPTIMISE_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_COMPILE_OPTIMISE) x )
-#define DEBUG_PARSE_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_COMPILE_PARSE) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_OPTIMISE)) x )
#define DEBUG_DUMP_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_COMPILE_DUMP) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_DUMP)) x )
#define DEBUG_TRIE_COMPILE_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_COMPILE_TRIE) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_TRIE)) x )
#define DEBUG_FLAGS_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_COMPILE_FLAGS) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_FLAGS)) x )
#define DEBUG_TEST_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_COMPILE_TEST) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_TEST)) x )
/* Execute */
#define DEBUG_EXECUTE_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXECUTE_MASK) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXECUTE_MASK)) x )
#define DEBUG_INTUIT_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXECUTE_INTUIT) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXECUTE_INTUIT)) x )
#define DEBUG_MATCH_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXECUTE_MATCH) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXECUTE_MATCH)) x )
#define DEBUG_TRIE_EXECUTE_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXECUTE_TRIE) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXECUTE_TRIE)) x )
/* Extra */
#define DEBUG_EXTRA_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXTRA_MASK) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_MASK)) x )
#define DEBUG_OFFSETS_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXTRA_OFFSETS) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_OFFSETS)) x )
#define DEBUG_STATE_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXTRA_STATE) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_STATE)) x )
#define DEBUG_STACK_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXTRA_STACK) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_STACK)) x )
#define DEBUG_BUFFERS_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXTRA_BUFFERS) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_BUFFERS)) x )
#define DEBUG_OPTIMISE_MORE_r(x) DEBUG_r( \
- if ((RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE) == \
- (re_debug_flags & (RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE)) ) x )
+ if (DEBUG_v_TEST || ((RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE) == \
+ (re_debug_flags & (RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE)))) x )
#define MJD_OFFSET_DEBUG(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXTRA_OFFDEBUG) \
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_OFFDEBUG)) \
Perl_warn_nocontext x )
#define DEBUG_TRIE_COMPILE_MORE_r(x) DEBUG_TRIE_COMPILE_r( \
- if (re_debug_flags & RE_DEBUG_EXTRA_TRIE) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_TRIE)) x )
#define DEBUG_TRIE_EXECUTE_MORE_r(x) DEBUG_TRIE_EXECUTE_r( \
- if (re_debug_flags & RE_DEBUG_EXTRA_TRIE) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_TRIE)) x )
#define DEBUG_TRIE_r(x) DEBUG_r( \
- if (re_debug_flags & (RE_DEBUG_COMPILE_TRIE \
- | RE_DEBUG_EXECUTE_TRIE )) x )
+ if (DEBUG_v_TEST || (re_debug_flags & (RE_DEBUG_COMPILE_TRIE \
+ | RE_DEBUG_EXECUTE_TRIE ))) x )
#define DEBUG_GPOS_r(x) DEBUG_r( \
- if (re_debug_flags & RE_DEBUG_EXTRA_GPOS) x )
+ if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_GPOS)) x )
/* initialization */
/* get_sv() can return NULL during global destruction. */
#ifdef DEBUGGING
-#define GET_RE_DEBUG_FLAGS_DECL VOL IV re_debug_flags = 0; \
+#define GET_RE_DEBUG_FLAGS_DECL volatile IV re_debug_flags = 0; \
PERL_UNUSED_VAR(re_debug_flags); GET_RE_DEBUG_FLAGS;
-#define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2) \
- const char * const rpv = \
- pv_pretty((dsv), (pv), (l), \
- (PL_dump_re_max_len) ? PL_dump_re_max_len : (m), \
- PL_colors[(c1)],PL_colors[(c2)], \
+#define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2) \
+ const char * const rpv = \
+ pv_pretty((dsv), (pv), (l), (m), \
+ PL_colors[(c1)],PL_colors[(c2)], \
PERL_PV_ESCAPE_RE|PERL_PV_ESCAPE_NONASCII |((isuni) ? PERL_PV_ESCAPE_UNI : 0) ); \
const int rlen = SvCUR(dsv)
-#define RE_SV_ESCAPE(rpv,isuni,dsv,sv,m) \
- const char * const rpv = \
- pv_pretty((dsv), (SvPV_nolen_const(sv)), (SvCUR(sv)), \
- (PL_dump_re_max_len) ? PL_dump_re_max_len : (m), \
- PL_colors[(c1)],PL_colors[(c2)], \
+/* This is currently unsed in the core */
+#define RE_SV_ESCAPE(rpv,isuni,dsv,sv,m) \
+ const char * const rpv = \
+ pv_pretty((dsv), (SvPV_nolen_const(sv)), (SvCUR(sv)), (m), \
+ PL_colors[(c1)],PL_colors[(c2)], \
PERL_PV_ESCAPE_RE|PERL_PV_ESCAPE_NONASCII |((isuni) ? PERL_PV_ESCAPE_UNI : 0) )
-#define RE_PV_QUOTED_DECL(rpv,isuni,dsv,pv,l,m) \
- const char * const rpv = \
- pv_pretty((dsv), (pv), (l), \
- (PL_dump_re_max_len) ? PL_dump_re_max_len : (m), \
- PL_colors[0], PL_colors[1], \
+#define RE_PV_QUOTED_DECL(rpv,isuni,dsv,pv,l,m) \
+ const char * const rpv = \
+ pv_pretty((dsv), (pv), (l), (m), \
+ PL_colors[0], PL_colors[1], \
( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_ESCAPE_NONASCII | PERL_PV_PRETTY_ELLIPSES | \
((isuni) ? PERL_PV_ESCAPE_UNI : 0)) \
)
#endif /* DEBUG RELATED DEFINES */
+#define FIRST_NON_ASCII_DECIMAL_DIGIT 0x660 /* ARABIC_INDIC_DIGIT_ZERO */
+
typedef enum {
TRADITIONAL_BOUND = _CC_WORDCHAR,
GCB_BOUND,