U8 flags;
U8 type;
U16 next_off;
- U32 arg1;
+ U32 arg1; /* used as ptr in S_regclass */
char bitmap[ANYOF_BITMAP_SIZE]; /* only compile-time */
};
-struct regnode_charclass_class { /* has [[:blah:]] classes */
- U8 flags; /* should have ANYOF_CLASS here */
+/* has runtime (locale) \d, \w, ..., [:posix:] classes */
+struct regnode_charclass_class {
+ U8 flags; /* ANYOF_CLASS bit must go here */
U8 type;
U16 next_off;
- U32 arg1;
+ U32 arg1; /* used as ptr in S_regclass */
char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time */
char classflags[ANYOF_CLASSBITMAP_SIZE]; /* and run-time */
};
#undef STRING
#define OP(p) ((p)->type)
+#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
+ regnode types */
#define OPERAND(p) (((struct regnode_string *)p)->string)
#define MASK(p) ((char*)OPERAND(p))
#define STR_LEN(p) (((struct regnode_string *)p)->str_len)
#define SIZE_ONLY (RExC_emit == &PL_regdummy)
+/* Flags for node->flags of several of the node types */
+#define USE_UNI 0x01
+
/* Flags for node->flags of ANYOF */
-#define ANYOF_CLASS 0x08 /* has [[:blah:]] classes */
+#define ANYOF_CLASS 0x08 /* has runtime \d, \w, [:posix:], ... */
+#define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */
#define ANYOF_INVERT 0x04
#define ANYOF_FOLD 0x02
#define ANYOF_LOCALE 0x01
-/* Used for regstclass only */
-#define ANYOF_EOS 0x10 /* Can match an empty string too */
-
-/* There is a character or a range past 0xff */
-#define ANYOF_UNICODE 0x20
-#define ANYOF_UNICODE_ALL 0x40 /* Can match any char past 0xff */
+/* EOS used for regstclass only */
+#define ANYOF_EOS 0x10 /* Can match an empty string too */
-/* size of node is large (includes class pointer) */
-#define ANYOF_LARGE 0x80
-
-/* Are there any runtime flags on in this node? */
-#define ANYOF_RUNTIME(s) (ANYOF_FLAGS(s) & 0x0f)
+#define ANYOF_UNICODE 0x20 /* Matches >= one thing past 0xff */
+#define ANYOF_UNICODE_ALL 0x40 /* Matches 0x100 - infinity */
#define ANYOF_FLAGS_ALL 0xff
#include "regnodes.h"
#endif
-/* The following have no fixed length. U8 so we can do strchr() on it. */
-#ifndef DOINIT
-EXTCONST U8 PL_varies[];
-#else
-EXTCONST U8 PL_varies[] = {
- BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, REFF, REFFL,
- WHILEM, CURLYM, CURLYN, BRANCHJ, IFTHEN, SUSPEND, CLUMP,
- NREF, NREFF, NREFFL,
- 0
-};
-#endif
-
-/* The following always have a length of 1. U8 we can do strchr() on it. */
-/* (Note that length 1 means "one character" under UTF8, not "one octet".) */
-#ifndef DOINIT
-EXTCONST U8 PL_simple[];
-#else
-EXTCONST U8 PL_simple[] = {
- REG_ANY, SANY, CANY,
- ANYOF,
- ALNUM, ALNUML,
- NALNUM, NALNUML,
- SPACE, SPACEL,
- NSPACE, NSPACEL,
- DIGIT, NDIGIT,
- VERTWS, NVERTWS,
- HORIZWS, NHORIZWS,
- 0
-};
-#endif
-
#ifndef PLUGGABLE_RE_EXTENSION
#ifndef DOINIT
EXTCONST regexp_engine PL_core_reg_engine;
/* .what is a character array with one character for each member of .data
* The character describes the function of the corresponding .data item:
+ * a - AV for paren_name_list under DEBUGGING
* f - start-class data for regstclass optimization
* n - Root of op tree for (?{EVAL}) item
* o - Start op for (?{EVAL}) item
} trans;
};
+/* info per word; indexed by wordnum */
+typedef struct {
+ U16 prev; /* previous word in acceptance chain; eg in
+ * zzz|abc|ab/ after matching the chars abc, the
+ * accepted word is #2, and the previous accepted
+ * word is #3 */
+ U32 len; /* how many chars long is this word? */
+ U32 accept; /* accept state for this word */
+} reg_trie_wordinfo;
typedef struct _reg_trie_state reg_trie_state;
reg_trie_state *states; /* state data */
reg_trie_trans *trans; /* array of transition elements */
char *bitmap; /* stclass bitmap */
- U32 *wordlen; /* array of lengths of words */
U16 *jump; /* optional 1 indexed array of offsets before tail
for the node following a given word. */
- U16 *nextword; /* optional 1 indexed array to support linked list
- of duplicate wordnums */
+ reg_trie_wordinfo *wordinfo; /* array of info per word */
U16 uniquecharcount; /* unique chars in trie (width of trans table) */
U32 startstate; /* initial state - used for common prefix optimisation */
STRLEN minlen; /* minimum length of words in trie - build/opt only? */
STRLEN maxlen; /* maximum length of words in trie - build/opt only? */
+ U32 prefixlen; /* #chars in common prefix */
U32 statecount; /* Build only - number of states in the states array
(including the unused zero state) */
U32 wordcount; /* Build only */
if (re_debug_flags & RE_DEBUG_EXTRA_GPOS) x )
/* initialization */
-/* get_sv() can return NULL during global destruction. */
+/* get_sv() can return NULL during global destruction. re_debug_flags can get
+ * clobbered by a longjmp, so must be initialized */
#define GET_RE_DEBUG_FLAGS DEBUG_r({ \
SV * re_debug_flags_sv = NULL; \
+ re_debug_flags = 0; \
re_debug_flags_sv = get_sv(RE_DEBUG_FLAGS, 1); \
if (re_debug_flags_sv) { \
if (!SvIOK(re_debug_flags_sv)) \
#ifdef DEBUGGING
-#define GET_RE_DEBUG_FLAGS_DECL IV re_debug_flags = 0; GET_RE_DEBUG_FLAGS;
+#define GET_RE_DEBUG_FLAGS_DECL VOL IV re_debug_flags = 0; GET_RE_DEBUG_FLAGS;
#define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2) \
const char * const rpv = \