X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/2e84be61fa7eefd83460cbe9147cdd734e6da947..4b844e06c99d9c2e251dde1c8abc47508b801786:/regcomp.h diff --git a/regcomp.h b/regcomp.h index 20b4401..8f0b828 100644 --- a/regcomp.h +++ b/regcomp.h @@ -204,15 +204,16 @@ struct regnode_charclass { U8 flags; U8 type; U16 next_off; - U32 arg1; + U32 arg1; /* used as ptr in S_regclass */ char bitmap[ANYOF_BITMAP_SIZE]; /* only compile-time */ }; -struct regnode_charclass_class { /* has [[:blah:]] classes */ - U8 flags; /* should have ANYOF_CLASS here */ +/* has runtime (locale) \d, \w, ..., [:posix:] classes */ +struct regnode_charclass_class { + U8 flags; /* ANYOF_CLASS bit must go here */ U8 type; U16 next_off; - U32 arg1; + U32 arg1; /* used as ptr in S_regclass */ char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time */ char classflags[ANYOF_CLASSBITMAP_SIZE]; /* and run-time */ }; @@ -271,6 +272,8 @@ struct regnode_charclass_class { /* has [[:blah:]] classes */ #undef STRING #define OP(p) ((p)->type) +#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \ + regnode types */ #define OPERAND(p) (((struct regnode_string *)p)->string) #define MASK(p) ((char*)OPERAND(p)) #define STR_LEN(p) (((struct regnode_string *)p)->str_len) @@ -306,25 +309,22 @@ struct regnode_charclass_class { /* has [[:blah:]] classes */ #define SIZE_ONLY (RExC_emit == &PL_regdummy) +/* Flags for node->flags of several of the node types */ +#define USE_UNI 0x01 + /* Flags for node->flags of ANYOF */ -#define ANYOF_CLASS 0x08 /* has [[:blah:]] classes */ +#define ANYOF_CLASS 0x08 /* has runtime \d, \w, [:posix:], ... */ +#define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */ #define ANYOF_INVERT 0x04 #define ANYOF_FOLD 0x02 #define ANYOF_LOCALE 0x01 -/* Used for regstclass only */ -#define ANYOF_EOS 0x10 /* Can match an empty string too */ - -/* There is a character or a range past 0xff */ -#define ANYOF_UNICODE 0x20 -#define ANYOF_UNICODE_ALL 0x40 /* Can match any char past 0xff */ +/* EOS used for regstclass only */ +#define ANYOF_EOS 0x10 /* Can match an empty string too */ -/* size of node is large (includes class pointer) */ -#define ANYOF_LARGE 0x80 - -/* Are there any runtime flags on in this node? */ -#define ANYOF_RUNTIME(s) (ANYOF_FLAGS(s) & 0x0f) +#define ANYOF_UNICODE 0x20 /* Matches >= one thing past 0xff */ +#define ANYOF_UNICODE_ALL 0x40 /* Matches 0x100 - infinity */ #define ANYOF_FLAGS_ALL 0xff @@ -447,37 +447,6 @@ START_EXTERN_C #include "regnodes.h" #endif -/* The following have no fixed length. U8 so we can do strchr() on it. */ -#ifndef DOINIT -EXTCONST U8 PL_varies[]; -#else -EXTCONST U8 PL_varies[] = { - BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, REFF, REFFL, - WHILEM, CURLYM, CURLYN, BRANCHJ, IFTHEN, SUSPEND, CLUMP, - NREF, NREFF, NREFFL, - 0 -}; -#endif - -/* The following always have a length of 1. U8 we can do strchr() on it. */ -/* (Note that length 1 means "one character" under UTF8, not "one octet".) */ -#ifndef DOINIT -EXTCONST U8 PL_simple[]; -#else -EXTCONST U8 PL_simple[] = { - REG_ANY, SANY, CANY, - ANYOF, - ALNUM, ALNUML, - NALNUM, NALNUML, - SPACE, SPACEL, - NSPACE, NSPACEL, - DIGIT, NDIGIT, - VERTWS, NVERTWS, - HORIZWS, NHORIZWS, - 0 -}; -#endif - #ifndef PLUGGABLE_RE_EXTENSION #ifndef DOINIT EXTCONST regexp_engine PL_core_reg_engine; @@ -507,6 +476,7 @@ END_EXTERN_C /* .what is a character array with one character for each member of .data * The character describes the function of the corresponding .data item: + * a - AV for paren_name_list under DEBUGGING * f - start-class data for regstclass optimization * n - Root of op tree for (?{EVAL}) item * o - Start op for (?{EVAL}) item @@ -586,6 +556,15 @@ struct _reg_trie_state { } trans; }; +/* info per word; indexed by wordnum */ +typedef struct { + U16 prev; /* previous word in acceptance chain; eg in + * zzz|abc|ab/ after matching the chars abc, the + * accepted word is #2, and the previous accepted + * word is #3 */ + U32 len; /* how many chars long is this word? */ + U32 accept; /* accept state for this word */ +} reg_trie_wordinfo; typedef struct _reg_trie_state reg_trie_state; @@ -603,15 +582,14 @@ struct _reg_trie_data { reg_trie_state *states; /* state data */ reg_trie_trans *trans; /* array of transition elements */ char *bitmap; /* stclass bitmap */ - U32 *wordlen; /* array of lengths of words */ U16 *jump; /* optional 1 indexed array of offsets before tail for the node following a given word. */ - U16 *nextword; /* optional 1 indexed array to support linked list - of duplicate wordnums */ + reg_trie_wordinfo *wordinfo; /* array of info per word */ U16 uniquecharcount; /* unique chars in trie (width of trans table) */ U32 startstate; /* initial state - used for common prefix optimisation */ STRLEN minlen; /* minimum length of words in trie - build/opt only? */ STRLEN maxlen; /* maximum length of words in trie - build/opt only? */ + U32 prefixlen; /* #chars in common prefix */ U32 statecount; /* Build only - number of states in the states array (including the unused zero state) */ U32 wordcount; /* Build only */ @@ -789,9 +767,11 @@ re.pm, especially to the documentation. if (re_debug_flags & RE_DEBUG_EXTRA_GPOS) x ) /* initialization */ -/* get_sv() can return NULL during global destruction. */ +/* get_sv() can return NULL during global destruction. re_debug_flags can get + * clobbered by a longjmp, so must be initialized */ #define GET_RE_DEBUG_FLAGS DEBUG_r({ \ SV * re_debug_flags_sv = NULL; \ + re_debug_flags = 0; \ re_debug_flags_sv = get_sv(RE_DEBUG_FLAGS, 1); \ if (re_debug_flags_sv) { \ if (!SvIOK(re_debug_flags_sv)) \ @@ -802,7 +782,7 @@ re.pm, especially to the documentation. #ifdef DEBUGGING -#define GET_RE_DEBUG_FLAGS_DECL IV re_debug_flags = 0; GET_RE_DEBUG_FLAGS; +#define GET_RE_DEBUG_FLAGS_DECL VOL IV re_debug_flags = 0; GET_RE_DEBUG_FLAGS; #define RE_PV_COLOR_DECL(rpv,rlen,isuni,dsv,pv,l,m,c1,c2) \ const char * const rpv = \