X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/68ba3a3fdba75589a9e65167c7caeb83c4231690..64484faa0be51485f76b4a5e1542b1fe76c13a76:/regcomp.h diff --git a/regcomp.h b/regcomp.h index 51b14b7..3785818 100644 --- a/regcomp.h +++ b/regcomp.h @@ -1,12 +1,13 @@ /* regcomp.h * * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, - * 2000, 2001, 2002, 2003, 2005, 2006 by Larry Wall and others + * 2000, 2001, 2002, 2003, 2005, 2006, 2007, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. * */ +#include "regcharclass.h" typedef OP OP_4tree; /* Will be redefined later. */ @@ -101,11 +102,7 @@ typedef OP OP_4tree; /* Will be redefined later. */ /* This is the stuff that used to live in regexp.h that was truly private to the engine itself. It now lives here. */ -/* swap buffer for paren structs */ -typedef struct regexp_paren_ofs { - I32 *startp; - I32 *endp; -} regexp_paren_ofs; + typedef struct regexp_internal { int name_list_idx; /* Optional data index of an array of paren names */ @@ -118,7 +115,6 @@ typedef struct regexp_paren_ofs { U32 proglen; } u; - regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */ regnode *regstclass; /* Optional startclass as identified or constructed by the optimiser */ struct reg_data *data; /* Additional miscellaneous data used by the program. @@ -182,7 +178,7 @@ struct regnode_2 { #define ANYOF_BITMAP_SIZE 32 /* 256 b/(8 b/B) */ -#define ANYOF_CLASSBITMAP_SIZE 4 /* up to 32 (8*4) named classes */ +#define ANYOF_CLASSBITMAP_SIZE 4 /* up to 40 (8*5) named classes */ /* also used by trie */ struct regnode_charclass { @@ -350,6 +346,14 @@ struct regnode_charclass_class { /* has [[:blah:]] classes */ #define ANYOF_MAX 32 +/* pseudo classes, not stored in the class bitmap, but used as flags + during compilation of char classes */ + +#define ANYOF_VERTWS (ANYOF_MAX+1) +#define ANYOF_NVERTWS (ANYOF_MAX+2) +#define ANYOF_HORIZWS (ANYOF_MAX+3) +#define ANYOF_NHORIZWS (ANYOF_MAX+4) + /* Backward source code compatibility. */ #define ANYOF_ALNUML ANYOF_ALNUM @@ -414,6 +418,7 @@ struct regnode_charclass_class { /* has [[:blah:]] classes */ #define REG_TOP_LEVEL_BRANCHES 0x00000040 #define REG_SEEN_VERBARG 0x00000080 #define REG_SEEN_CUTGROUP 0x00000100 +#define REG_SEEN_RUN_ON_COMMENT 0x00000200 START_EXTERN_C @@ -448,6 +453,8 @@ EXTCONST U8 PL_simple[] = { SPACE, SPACEL, NSPACE, NSPACEL, DIGIT, NDIGIT, + VERTWS, NVERTWS, + HORIZWS, NHORIZWS, 0 }; #endif @@ -457,11 +464,17 @@ EXTCONST U8 PL_simple[] = { EXTCONST regexp_engine PL_core_reg_engine; #else /* DOINIT */ EXTCONST regexp_engine PL_core_reg_engine = { - Perl_re_compile, - Perl_regexec_flags, + Perl_re_compile, + Perl_regexec_flags, Perl_re_intuit_start, Perl_re_intuit_string, - Perl_regfree_internal, + Perl_regfree_internal, + Perl_reg_numbered_buff_fetch, + Perl_reg_numbered_buff_store, + Perl_reg_numbered_buff_length, + Perl_reg_named_buff, + Perl_reg_named_buff_iter, + Perl_reg_qr_package, #if defined(USE_ITHREADS) Perl_regdupe_internal #endif @@ -479,7 +492,7 @@ END_EXTERN_C * n - Root of op tree for (?{EVAL}) item * o - Start op for (?{EVAL}) item * p - Pad for (?{EVAL}) item - * s - swash for unicode-style character class, and the multicharacter + * s - swash for Unicode-style character class, and the multicharacter * strings resulting from casefolding the single-character entries * in the character class * t - trie struct @@ -563,20 +576,20 @@ typedef struct _reg_trie_trans reg_trie_trans; optimisation in Perl_regdupe. */ struct _reg_trie_data { U32 refcount; /* number of times this trie is referenced */ - U16 uniquecharcount; /* unique chars in trie (width of trans table) */ U32 lasttrans; /* last valid transition element */ U16 *charmap; /* byte to charid lookup array */ reg_trie_state *states; /* state data */ reg_trie_trans *trans; /* array of transition elements */ char *bitmap; /* stclass bitmap */ - U32 startstate; /* initial state - used for common prefix optimisation */ - STRLEN minlen; /* minimum length of words in trie - build/opt only? */ - STRLEN maxlen; /* maximum length of words in trie - build/opt only? */ U32 *wordlen; /* array of lengths of words */ U16 *jump; /* optional 1 indexed array of offsets before tail for the node following a given word. */ U16 *nextword; /* optional 1 indexed array to support linked list of duplicate wordnums */ + U16 uniquecharcount; /* unique chars in trie (width of trans table) */ + U32 startstate; /* initial state - used for common prefix optimisation */ + STRLEN minlen; /* minimum length of words in trie - build/opt only? */ + STRLEN maxlen; /* maximum length of words in trie - build/opt only? */ U32 statecount; /* Build only - number of states in the states array (including the unused zero state) */ U32 wordcount; /* Build only */ @@ -602,9 +615,9 @@ typedef struct _reg_trie_data reg_trie_data; optimisation in Perl_regdupe. */ struct _reg_ac_data { U32 refcount; + U32 trie; U32 *fail; reg_trie_state *states; - U32 trie; }; typedef struct _reg_ac_data reg_ac_data; @@ -678,6 +691,7 @@ re.pm, especially to the documentation. #define RE_DEBUG_COMPILE_OPTIMISE 0x000002 #define RE_DEBUG_COMPILE_TRIE 0x000004 #define RE_DEBUG_COMPILE_DUMP 0x000008 +#define RE_DEBUG_COMPILE_FLAGS 0x000010 /* Execute */ #define RE_DEBUG_EXECUTE_MASK 0x00FF00 @@ -692,6 +706,7 @@ re.pm, especially to the documentation. #define RE_DEBUG_EXTRA_OFFDEBUG 0x040000 #define RE_DEBUG_EXTRA_STATE 0x080000 #define RE_DEBUG_EXTRA_OPTIMISE 0x100000 +#define RE_DEBUG_EXTRA_BUFFERS 0x400000 /* combined */ #define RE_DEBUG_EXTRA_STACK 0x280000 @@ -709,7 +724,8 @@ re.pm, especially to the documentation. if (re_debug_flags & RE_DEBUG_COMPILE_DUMP) x ) #define DEBUG_TRIE_COMPILE_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_COMPILE_TRIE) x ) - +#define DEBUG_FLAGS_r(x) DEBUG_r( \ + if (re_debug_flags & RE_DEBUG_COMPILE_FLAGS) x ) /* Execute */ #define DEBUG_EXECUTE_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_EXECUTE_MASK) x ) @@ -729,6 +745,9 @@ re.pm, especially to the documentation. if (re_debug_flags & RE_DEBUG_EXTRA_STATE) x ) #define DEBUG_STACK_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_EXTRA_STACK) x ) +#define DEBUG_BUFFERS_r(x) DEBUG_r( \ + if (re_debug_flags & RE_DEBUG_EXTRA_BUFFERS) x ) + #define DEBUG_OPTIMISE_MORE_r(x) DEBUG_r( \ if ((RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE) == \ (re_debug_flags & (RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE)) ) x ) @@ -777,7 +796,7 @@ re.pm, especially to the documentation. const char * const rpv = \ pv_pretty((dsv), (pv), (l), (m), \ PL_colors[0], PL_colors[1], \ - ( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_PRETTY_ELIPSES | \ + ( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_PRETTY_ELLIPSES | \ ((isuni) ? PERL_PV_ESCAPE_UNI : 0)) \ ) @@ -796,3 +815,4 @@ re.pm, especially to the documentation. #endif /* DEBUG RELATED DEFINES */ +