*
*/
-#ifndef PERL_REGCOMP_H_
+#if ! defined(PERL_REGCOMP_H_) && ( defined(PERL_CORE) \
+ || defined(PERL_EXT_RE_BUILD))
#define PERL_REGCOMP_H_
#include "regcharclass.h"
#endif
/*
- * The "internal use only" fields in regexp.h are present to pass info from
- * compile to execute that permits the execute phase to run lots faster on
- * simple cases. They are:
- *
- * regstart sv that must begin a match; NULL if none obvious
- * reganch is the match anchored (at beginning-of-line only)?
- * regmust string (pointer into program) that match must include, or NULL
- * [regmust changed to SV* for bminstr()--law]
- * regmlen length of regmust string
- * [regmlen not used currently]
- *
- * Regstart and reganch permit very fast decisions on suitable starting points
- * for a match, cutting down the work a lot. Regmust permits fast rejection
- * of lines that cannot possibly match. The regmust tests are costly enough
- * that pregcomp() supplies a regmust only if the r.e. contains something
- * potentially expensive (at present, the only such thing detected is * or +
- * at the start of the r.e., which can involve a lot of backup). Regmlen is
- * supplied because the test in pregexec() needs it and pregcomp() is computing
- * it anyway.
- * [regmust is now supplied always. The tests that use regmust have a
- * heuristic that disables the test if it usually matches.]
- *
- * [In fact, we now use regmust in many cases to locate where the search
- * starts in the string, so if regback is >= 0, the regmust search is never
- * wasted effort. The regback variable says how many characters back from
- * where regmust matched is the earliest possible start of the match.
- * For instance, /[a-z].foo/ has a regmust of 'foo' and a regback of 2.]
- */
-
-/*
* Structure for regexp "program". This is essentially a linear encoding
* of a nondeterministic finite-state machine (aka syntax charts or
* "railroad normal form" in parsing technology). Each node is an opcode
private to the engine itself. It now lives here. */
typedef struct regexp_internal {
- int name_list_idx; /* Optional data index of an array of paren names */
union {
- U32 *offsets; /* offset annotations 20001228 MJD
+ U32 *offsets; /* offset annotations 20001228 MJD
data about mapping the program to the
string -
offsets[0] is proglen when this is used
Used to make it easier to clone and free arbitrary
data that the regops need. Often the ARG field of
a regop is an index into this structure */
- struct reg_code_blocks *code_blocks;/* positions of literal (?{}) */
- regnode program[1]; /* Unwarranted chumminess with compiler. */
+ struct reg_code_blocks *code_blocks;/* positions of literal (?{}) */
+ int name_list_idx; /* Optional data index of an array of paren names */
+ regnode program[1]; /* Unwarranted chumminess with compiler. */
} regexp_internal;
#define RXi_SET(x,y) (x)->pprivate = (void*)(y)
U32 arg1;
};
-/* Node whose argument is 'void *', a pointer to void. This needs to be used
- * very carefully in situations where pointers won't become invalid because of,
- * say re-mallocs */
+/* Node whose argument is 'SV *'. This needs to be used very carefully in
+ * situations where pointers won't become invalid because of, say re-mallocs */
struct regnode_p {
U8 flags;
U8 type;
U16 next_off;
- void * arg1;
+ SV * arg1;
};
/* Similar to a regnode_1 but with an extra signed argument */
((1<<32)-1), while on the Cray T90, sizeof(short)==8 and U16_MAX is
((1<<64)-1). To limit stack growth to reasonable sizes, supply a
smaller default.
- --Andy Dougherty 11 June 1998
+ --Andy Dougherty 11 June 1998
*/
#if SHORTSIZE > 2
# ifndef REG_INFTY
-# define REG_INFTY ((1<<16)-1)
+# define REG_INFTY nBIT_UMAX(16)
# endif
#endif
#define OP(p) ((p)->type)
#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
- regnode types. For some, it's the \
- character set of the regnode */
+ regnode types. For some, it's the \
+ character set of the regnode */
#define STR_LENs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8) \
((struct regnode_string *)p)->str_len)
#define STRINGs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8) \
} STMT_END
#define ANYOFR_BASE_BITS 20
-#define ANYOFRbase(p) (ARG(p) & ((1 << ANYOFR_BASE_BITS) - 1))
+#define ANYOFRbase(p) (ARG(p) & nBIT_MASK(ANYOFR_BASE_BITS))
#define ANYOFRdelta(p) (ARG(p) >> ANYOFR_BASE_BITS)
#undef NODE_ALIGN
* regex is compiled. In this case, we don't know until runtime what it
* will match, so we have to assume it could match anything, including
* code points that ordinarily would be in the bitmap. A flag bit is
- * necessary to indicate this , though it can be shared with the item 3)
+ * necessary to indicate this, though it can be shared with the item 3)
* flag, as that only occurs under /d, and this only occurs under non-d.
* This case is quite uncommon in the field, and the /(?[ ...])/ construct
* is a better way to accomplish what this feature does. This case also
* Another possibility is based on the fact that ANYOF_MATCHES_POSIXL is
* redundant with the node type ANYOFPOSIXL. That flag could be removed, but
* at the expense of extra code in regexec.c. The flag has been retained
- * because it allows us to see if we need to call reginsert, or just use the
+ * because it allows us to see if we need to call reginclass, or just use the
* bitmap in one test.
*
* If this is done, an extension would be to make all ANYOFL nodes contain the
} STMT_END
/* Shifts a bit to get, eg. 0x4000_0000, then subtracts 1 to get 0x3FFF_FFFF */
-#define ANYOF_POSIXL_SETALL(ret) STMT_START { ((regnode_charclass_posixl*) (ret))->classflags = ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1; } STMT_END
+#define ANYOF_POSIXL_SETALL(ret) STMT_START { ((regnode_charclass_posixl*) (ret))->classflags = nBIT_MASK(ANYOF_POSIXL_MAX); } STMT_END
#define ANYOF_CLASS_SETALL(ret) ANYOF_POSIXL_SETALL(ret)
#define ANYOF_POSIXL_TEST_ANY_SET(p) \
((ANYOF_FLAGS(p) & ANYOF_MATCHES_POSIXL) \
- && (((regnode_charclass_posixl*)(p))->classflags))
+ && (((regnode_charclass_posixl*)(p))->classflags))
#define ANYOF_CLASS_TEST_ANY_SET(p) ANYOF_POSIXL_TEST_ANY_SET(p)
/* Since an SSC always has this field, we don't have to test for that; nor do
cBOOL(((regnode_ssc*)(p))->classflags)
#define ANYOF_POSIXL_SSC_TEST_ALL_SET(p) /* Are all bits set? */ \
(((regnode_ssc*) (p))->classflags \
- == ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1)
+ == nBIT_MASK(ANYOF_POSIXL_MAX))
#define ANYOF_POSIXL_TEST_ALL_SET(p) \
- ((ANYOF_FLAGS(p) & ANYOF_MATCHES_POSIXL) \
+ ((ANYOF_FLAGS(p) & ANYOF_MATCHES_POSIXL) \
&& ((regnode_charclass_posixl*) (p))->classflags \
- == ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1)
+ == nBIT_MASK(ANYOF_POSIXL_MAX))
#define ANYOF_POSIXL_OR(source, dest) STMT_START { (dest)->classflags |= (source)->classflags ; } STMT_END
#define ANYOF_CLASS_OR(source, dest) ANYOF_POSIXL_OR((source), (dest))
#define ANYOF_BITMAP_TEST(p, c) cBOOL(ANYOF_BITMAP_BYTE(p, c) & ANYOF_BIT(c))
#define ANYOF_BITMAP_SETALL(p) \
- memset (ANYOF_BITMAP(p), 255, ANYOF_BITMAP_SIZE)
+ memset (ANYOF_BITMAP(p), 255, ANYOF_BITMAP_SIZE)
#define ANYOF_BITMAP_CLEARALL(p) \
- Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE)
+ Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE)
/*
* Utility definitions.
/* info per word; indexed by wordnum */
typedef struct {
U16 prev; /* previous word in acceptance chain; eg in
- * zzz|abc|ab/ after matching the chars abc, the
- * accepted word is #2, and the previous accepted
- * word is #3 */
+ * zzz|abc|ab/ after matching the chars abc, the
+ * accepted word is #2, and the previous accepted
+ * word is #3 */
U32 len; /* how many chars long is this word? */
U32 accept; /* accept state for this word */
} reg_trie_wordinfo;
#define RE_DEBUG_EXECUTE_TRIE 0x000400
/* Extra */
-#define RE_DEBUG_EXTRA_MASK 0x1FF0000
+#define RE_DEBUG_EXTRA_MASK 0x3FF0000
#define RE_DEBUG_EXTRA_TRIE 0x0010000
#define RE_DEBUG_EXTRA_OFFSETS 0x0020000
#define RE_DEBUG_EXTRA_OFFDEBUG 0x0040000
#define RE_DEBUG_EXTRA_BUFFERS 0x0400000
#define RE_DEBUG_EXTRA_GPOS 0x0800000
#define RE_DEBUG_EXTRA_DUMP_PRE_OPTIMIZE 0x1000000
+#define RE_DEBUG_EXTRA_WILDCARD 0x2000000
/* combined */
#define RE_DEBUG_EXTRA_STACK 0x0280000
-#define RE_DEBUG_FLAG(x) (re_debug_flags & x)
+#define RE_DEBUG_FLAG(x) (re_debug_flags & (x))
/* Compile */
#define DEBUG_COMPILE_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_MASK)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_COMPILE_MASK)) x )
#define DEBUG_PARSE_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_PARSE)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_COMPILE_PARSE)) x )
#define DEBUG_OPTIMISE_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_OPTIMISE)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE)) x )
#define DEBUG_DUMP_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_DUMP)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_COMPILE_DUMP)) x )
#define DEBUG_TRIE_COMPILE_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_TRIE)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_COMPILE_TRIE)) x )
#define DEBUG_FLAGS_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_FLAGS)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_COMPILE_FLAGS)) x )
#define DEBUG_TEST_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_COMPILE_TEST)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_COMPILE_TEST)) x )
/* Execute */
#define DEBUG_EXECUTE_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXECUTE_MASK)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXECUTE_MASK)) x )
#define DEBUG_INTUIT_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXECUTE_INTUIT)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXECUTE_INTUIT)) x )
#define DEBUG_MATCH_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXECUTE_MATCH)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXECUTE_MATCH)) x )
#define DEBUG_TRIE_EXECUTE_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXECUTE_TRIE)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXECUTE_TRIE)) x )
/* Extra */
#define DEBUG_EXTRA_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_MASK)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_MASK)) x )
#define DEBUG_OFFSETS_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_OFFSETS)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_OFFSETS)) x )
#define DEBUG_STATE_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_STATE)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_STATE)) x )
#define DEBUG_STACK_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_STACK)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_STACK)) x )
#define DEBUG_BUFFERS_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_BUFFERS)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_BUFFERS)) x )
#define DEBUG_OPTIMISE_MORE_r(x) DEBUG_r( \
if (DEBUG_v_TEST || ((RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE) == \
- (re_debug_flags & (RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE)))) x )
+ RE_DEBUG_FLAG(RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE))) x )
#define MJD_OFFSET_DEBUG(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_OFFDEBUG)) \
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_OFFDEBUG)) \
Perl_warn_nocontext x )
#define DEBUG_TRIE_COMPILE_MORE_r(x) DEBUG_TRIE_COMPILE_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_TRIE)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_TRIE)) x )
#define DEBUG_TRIE_EXECUTE_MORE_r(x) DEBUG_TRIE_EXECUTE_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_TRIE)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_TRIE)) x )
#define DEBUG_TRIE_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & (RE_DEBUG_COMPILE_TRIE \
- | RE_DEBUG_EXECUTE_TRIE ))) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_COMPILE_TRIE \
+ | RE_DEBUG_EXECUTE_TRIE )) x )
#define DEBUG_GPOS_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_GPOS)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_GPOS)) x )
#define DEBUG_DUMP_PRE_OPTIMIZE_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || (re_debug_flags & RE_DEBUG_EXTRA_DUMP_PRE_OPTIMIZE)) x )
+ if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_DUMP_PRE_OPTIMIZE)) x )
/* initialization */
-/* get_sv() can return NULL during global destruction. */
-#define GET_RE_DEBUG_FLAGS DEBUG_r({ \
- SV * re_debug_flags_sv = NULL; \
+/* Get the debug flags for code not in regcomp.c nor regexec.c. This doesn't
+ * initialize the variable if it isn't already there, instead it just assumes
+ * the flags are 0 */
+#define DECLARE_AND_GET_RE_DEBUG_FLAGS_NON_REGEX \
+ volatile IV re_debug_flags = 0; PERL_UNUSED_VAR(re_debug_flags); \
+ STMT_START { \
+ SV * re_debug_flags_sv = NULL; \
+ /* get_sv() can return NULL during global destruction. */ \
re_debug_flags_sv = PL_curcop ? get_sv(RE_DEBUG_FLAGS, GV_ADD) : NULL; \
- if (re_debug_flags_sv) { \
- if (!SvIOK(re_debug_flags_sv)) \
- sv_setuv(re_debug_flags_sv, RE_DEBUG_COMPILE_DUMP | RE_DEBUG_EXECUTE_MASK ); \
- re_debug_flags=SvIV(re_debug_flags_sv); \
- }\
-})
+ if (re_debug_flags_sv && SvIOK(re_debug_flags_sv)) \
+ re_debug_flags=SvIV(re_debug_flags_sv); \
+ } STMT_END
+
#ifdef DEBUGGING
-#define GET_RE_DEBUG_FLAGS_DECL volatile IV re_debug_flags = 0; \
- PERL_UNUSED_VAR(re_debug_flags); GET_RE_DEBUG_FLAGS;
+/* For use in regcomp.c and regexec.c, Get the debug flags, and initialize to
+ * the defaults if not done already */
+#define DECLARE_AND_GET_RE_DEBUG_FLAGS \
+ volatile IV re_debug_flags = 0; PERL_UNUSED_VAR(re_debug_flags); \
+ DEBUG_r({ \
+ SV * re_debug_flags_sv = NULL; \
+ /* get_sv() can return NULL during global destruction. */ \
+ re_debug_flags_sv = PL_curcop ? get_sv(RE_DEBUG_FLAGS, GV_ADD) : NULL; \
+ if (re_debug_flags_sv) { \
+ if (!SvIOK(re_debug_flags_sv)) /* If doesnt exist set to default */\
+ sv_setuv(re_debug_flags_sv, \
+ /* These defaults should be kept in sync with re.pm */ \
+ RE_DEBUG_COMPILE_DUMP | RE_DEBUG_EXECUTE_MASK ); \
+ re_debug_flags=SvIV(re_debug_flags_sv); \
+ } \
+ })
+
+#define isDEBUG_WILDCARD (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_WILDCARD))
#define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2) \
const char * const rpv = \
#else /* if not DEBUGGING */
-#define GET_RE_DEBUG_FLAGS_DECL
-#define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2)
+#define DECLARE_AND_GET_RE_DEBUG_FLAGS dNOOP
+#define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2) dNOOP
#define RE_SV_ESCAPE(rpv,isuni,dsv,sv,m)
-#define RE_PV_QUOTED_DECL(rpv,isuni,dsv,pv,l,m)
+#define RE_PV_QUOTED_DECL(rpv,isuni,dsv,pv,l,m) dNOOP
#define RE_SV_DUMPLEN(ItEm)
#define RE_SV_TAIL(ItEm)
+#define isDEBUG_WILDCARD 0
#endif /* DEBUG RELATED DEFINES */
#define FIRST_NON_ASCII_DECIMAL_DIGIT 0x660 /* ARABIC_INDIC_DIGIT_ZERO */
typedef enum {
- TRADITIONAL_BOUND = _CC_WORDCHAR,
- GCB_BOUND,
- LB_BOUND,
- SB_BOUND,
- WB_BOUND
+ TRADITIONAL_BOUND = _CC_WORDCHAR,
+ GCB_BOUND,
+ LB_BOUND,
+ SB_BOUND,
+ WB_BOUND
} bound_type;
/* This unpacks the FLAGS field of ANYOF[HR]x nodes. The value it contains
* gives the strict lower bound for the UTF-8 start byte of any code point
* matchable by the node, and a loose upper bound as well.
*
- * The low bound is stored in the upper 6 bits, plus 0xC0.
+ * The low bound is stored as 0xC0 + ((the upper 6 bits) >> 2)
* The loose upper bound is determined from the lowest 2 bits and the low bound
* (called x) as follows:
*