X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/d24ca0c5f11250dcd2552c84a048bda5786ba8d1..4210d3f17cf9d854c0cbf0e1afb06e737174f8ea:/regexp.h?ds=sidebyside diff --git a/regexp.h b/regexp.h index 8a77122..1f27fd5 100644 --- a/regexp.h +++ b/regexp.h @@ -50,9 +50,18 @@ struct reg_substr_data { #define SV_SAVED_COPY #endif +/* offsets within a string of a particular /(.)/ capture */ + typedef struct regexp_paren_pair { I32 start; I32 end; + /* 'start_tmp' records a new opening position before the matching end + * has been found, so that the old start and end values are still + * valid, e.g. + * "abc" =~ /(.(?{print "[$1]"}))+/ + *outputs [][a][b] + * This field is not part of the API. */ + I32 start_tmp; } regexp_paren_pair; #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) @@ -115,11 +124,11 @@ struct reg_code_block { char *subbeg; \ SV_SAVED_COPY /* If non-NULL, SV which is COW from original */\ I32 sublen; /* Length of string pointed by subbeg */ \ + I32 suboffset; /* byte offset of subbeg from logical start of str */ \ + I32 subcoffset; /* suboffset equiv, but in chars (for @-/@+) */ \ /* Information about the match that isn't often used */ \ /* offset from wrapped to the start of precomp */ \ PERL_BITFIELD32 pre_prefix:4; \ - /* number of eval groups in the pattern - for security checks */\ - PERL_BITFIELD32 seen_evals:28; \ CV *qr_anoncv /* the anon sub wrapped round qr/(?{..})/ */ typedef struct regexp { @@ -166,7 +175,7 @@ typedef struct regexp_engine { REGEXP* (*op_comp) (pTHX_ SV ** const patternp, int pat_count, OP *expr, const struct regexp_engine* eng, REGEXP *VOL old_re, - int *is_bare_re, U32 orig_rx_flags, U32 pm_flags); + bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags); } regexp_engine; /* @@ -174,9 +183,12 @@ typedef struct regexp_engine { paren name. >= 1 is reserved for actual numbered captures, i.e. $1, $2 etc. */ -#define RX_BUFF_IDX_PREMATCH -2 /* $` / ${^PREMATCH} */ -#define RX_BUFF_IDX_POSTMATCH -1 /* $' / ${^POSTMATCH} */ -#define RX_BUFF_IDX_FULLMATCH 0 /* $& / ${^MATCH} */ +#define RX_BUFF_IDX_CARET_PREMATCH -5 /* ${^PREMATCH} */ +#define RX_BUFF_IDX_CARET_POSTMATCH -4 /* ${^POSTMATCH} */ +#define RX_BUFF_IDX_CARET_FULLMATCH -3 /* ${^MATCH} */ +#define RX_BUFF_IDX_PREMATCH -2 /* $` */ +#define RX_BUFF_IDX_POSTMATCH -1 /* $' */ +#define RX_BUFF_IDX_FULLMATCH 0 /* $& */ /* Flags that are passed to the named_buff and named_buff_iter @@ -467,6 +479,18 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) assert(SvTYPE(_rx_subbeg) == SVt_REGEXP); \ &SvANY(_rx_subbeg)->subbeg; \ })) +# define RX_SUBOFFSET(prog) \ + (*({ \ + const REGEXP *const _rx_suboffset = (prog); \ + assert(SvTYPE(_rx_suboffset) == SVt_REGEXP); \ + &SvANY(_rx_suboffset)->suboffset; \ + })) +# define RX_SUBCOFFSET(prog) \ + (*({ \ + const REGEXP *const _rx_subcoffset = (prog); \ + assert(SvTYPE(_rx_subcoffset) == SVt_REGEXP); \ + &SvANY(_rx_subcoffset)->subcoffset; \ + })) # define RX_OFFS(prog) \ (*({ \ const REGEXP *const _rx_offs = (prog); \ @@ -483,6 +507,8 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) # define RX_EXTFLAGS(prog) RXp_EXTFLAGS((struct regexp *)SvANY(prog)) # define RX_ENGINE(prog) (((struct regexp *)SvANY(prog))->engine) # define RX_SUBBEG(prog) (((struct regexp *)SvANY(prog))->subbeg) +# define RX_SUBOFFSET(prog) (((struct regexp *)SvANY(prog))->suboffset) +# define RX_SUBCOFFSET(prog) (((struct regexp *)SvANY(prog))->subcoffset) # define RX_OFFS(prog) (((struct regexp *)SvANY(prog))->offs) # define RX_NPARENS(prog) (((struct regexp *)SvANY(prog))->nparens) #endif @@ -492,7 +518,6 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) #define RX_GOFS(prog) (((struct regexp *)SvANY(prog))->gofs) #define RX_LASTPAREN(prog) (((struct regexp *)SvANY(prog))->lastparen) #define RX_LASTCLOSEPAREN(prog) (((struct regexp *)SvANY(prog))->lastcloseparen) -#define RX_SEEN_EVALS(prog) (((struct regexp *)SvANY(prog))->seen_evals) #define RX_SAVED_COPY(prog) (((struct regexp *)SvANY(prog))->saved_copy) #endif /* PLUGGABLE_RE_EXTENSION */ @@ -532,6 +557,11 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) #define REXEC_SCREAM 0x04 /* use scream table. */ #define REXEC_IGNOREPOS 0x08 /* \G matches at start. */ #define REXEC_NOT_FIRST 0x10 /* This is another iteration of //g. */ + /* under REXEC_COPY_STR, it's ok for the + * engine (modulo PL_sawamperand etc) + * to skip copying ... */ +#define REXEC_COPY_SKIP_PRE 0x20 /* ...the $` part of the string, or */ +#define REXEC_COPY_SKIP_POST 0x40 /* ...the $' part of the string */ #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) # define ReREFCNT_inc(re) \ @@ -604,6 +634,7 @@ typedef struct regmatch_state { /* this first element must match u.yes */ struct regmatch_state *prev_yes_state; U32 lastparen; + U32 lastcloseparen; CHECKPOINT cp; } branchlike; @@ -612,6 +643,7 @@ typedef struct regmatch_state { /* the first elements must match u.branchlike */ struct regmatch_state *prev_yes_state; U32 lastparen; + U32 lastcloseparen; CHECKPOINT cp; regnode *next_branch; /* next branch node */ @@ -621,17 +653,17 @@ typedef struct regmatch_state { /* the first elements must match u.branchlike */ struct regmatch_state *prev_yes_state; U32 lastparen; + U32 lastcloseparen; CHECKPOINT cp; U32 accepted; /* how many accepting states left */ + bool longfold;/* saw a fold with a 1->n char mapping */ U16 *jump; /* positive offsets from me */ - regnode *B; /* node following the trie */ regnode *me; /* Which node am I - needed for jump tries*/ U8 *firstpos;/* pos in string of first trie match */ U32 firstchars;/* len in chars of firstpos from start */ U16 nextword;/* next word to try */ U16 topword; /* longest accepted word */ - bool longfold;/* saw a fold with a 1->n char mapping */ } trie; /* special types - these members are used to store state for special @@ -704,6 +736,8 @@ typedef struct regmatch_state { struct regmatch_state *prev_yes_state; I32 c1, c2; /* case fold search */ CHECKPOINT cp; + U32 lastparen; + U32 lastcloseparen; I32 alen; /* length of first-matched A string */ I32 count; bool minmod; @@ -714,6 +748,8 @@ typedef struct regmatch_state { struct { U32 paren; CHECKPOINT cp; + U32 lastparen; + U32 lastcloseparen; I32 c1, c2; /* case fold search */ char *maxpos; /* highest possible point in string to match */ char *oldloc; /* the previous locinput */ @@ -739,14 +775,7 @@ typedef struct regmatch_slab { #define PL_reg_flags PL_reg_state.re_state_reg_flags #define PL_bostr PL_reg_state.re_state_bostr -#define PL_reginput PL_reg_state.re_state_reginput #define PL_regeol PL_reg_state.re_state_regeol -#define PL_regoffs PL_reg_state.re_state_regoffs -#define PL_reglastparen PL_reg_state.re_state_reglastparen -#define PL_reglastcloseparen PL_reg_state.re_state_reglastcloseparen -#define PL_reg_start_tmp PL_reg_state.re_state_reg_start_tmp -#define PL_reg_start_tmpl PL_reg_state.re_state_reg_start_tmpl -#define PL_reg_eval_set PL_reg_state.re_state_reg_eval_set #define PL_reg_match_utf8 PL_reg_state.re_state_reg_match_utf8 #define PL_reg_magic PL_reg_state.re_state_reg_magic #define PL_reg_oldpos PL_reg_state.re_state_reg_oldpos @@ -754,6 +783,8 @@ typedef struct regmatch_slab { #define PL_reg_curpm PL_reg_state.re_state_reg_curpm #define PL_reg_oldsaved PL_reg_state.re_state_reg_oldsaved #define PL_reg_oldsavedlen PL_reg_state.re_state_reg_oldsavedlen +#define PL_reg_oldsavedoffset PL_reg_state.re_state_reg_oldsavedoffset +#define PL_reg_oldsavedcoffset PL_reg_state.re_state_reg_oldsavedcoffset #define PL_reg_maxiter PL_reg_state.re_state_reg_maxiter #define PL_reg_leftiter PL_reg_state.re_state_reg_leftiter #define PL_reg_poscache PL_reg_state.re_state_reg_poscache @@ -764,22 +795,18 @@ typedef struct regmatch_slab { struct re_save_state { U32 re_state_reg_flags; /* from regexec.c */ - U32 re_state_reg_start_tmpl; /* from regexec.c */ - I32 re_state_reg_eval_set; /* from regexec.c */ + bool re_state_eval_setup_done; /* from regexec.c */ bool re_state_reg_match_utf8; /* from regexec.c */ bool re_reparsing; /* runtime (?{}) fed back into parser */ char *re_state_bostr; - char *re_state_reginput; /* String-input pointer. */ char *re_state_regeol; /* End of input, for $ check. */ - regexp_paren_pair *re_state_regoffs; /* Pointer to start/end pairs */ - U32 *re_state_reglastparen; /* Similarly for lastparen. */ - U32 *re_state_reglastcloseparen; /* Similarly for lastcloseparen. */ - char **re_state_reg_start_tmp; /* from regexec.c */ MAGIC *re_state_reg_magic; /* from regexec.c */ PMOP *re_state_reg_oldcurpm; /* from regexec.c */ PMOP *re_state_reg_curpm; /* from regexec.c */ char *re_state_reg_oldsaved; /* old saved substr during match */ STRLEN re_state_reg_oldsavedlen; /* old length of saved substr during match */ + STRLEN re_state_reg_oldsavedoffset; /* old offset of saved substr during match */ + STRLEN re_state_reg_oldsavedcoffset;/* old coffset of saved substr during match */ STRLEN re_state_reg_poscache_size; /* size of pos cache of WHILEM */ I32 re_state_reg_oldpos; /* from regexec.c */ I32 re_state_reg_maxiter; /* max wait until caching pos */