X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/fe578d7fdd84ab0398dc36da7f84e59e1f2bb290..5e727a3e37a952b6b2298aac864ef008e764ee8d:/regexp.h?ds=sidebyside diff --git a/regexp.h b/regexp.h index a833c6b..c2ffcf3 100644 --- a/regexp.h +++ b/regexp.h @@ -1,7 +1,7 @@ /* regexp.h * * Copyright (C) 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2003, - * 2005, 2006 by Larry Wall and others + * 2005, 2006, 2007, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -55,8 +55,17 @@ typedef struct regexp_paren_pair { I32 end; } regexp_paren_pair; -/* this is ordered such that the most commonly used - fields are at the start of the struct */ +/* + The regexp/REGEXP struct, see L for further documentation + on the individual fields. The struct is ordered so that the most + commonly used fields are placed at the start. + + Any patch that adds items to this struct will need to include + changes to F (C) and F + (C). This involves freeing or cloning items in the + regexp's data array based on the data item's type. +*/ + typedef struct regexp { /* what engine created this regexp? */ const struct regexp_engine* engine; @@ -112,29 +121,111 @@ typedef struct re_scream_pos_data_s * Any regex engine implementation must be able to build one of these. */ typedef struct regexp_engine { - regexp* (*comp) (pTHX_ char* exp, char* xend, U32 pm_flags); - I32 (*exec) (pTHX_ regexp* prog, char* stringarg, char* strend, - char* strbeg, I32 minend, SV* screamer, - void* data, U32 flags); - char* (*intuit) (pTHX_ regexp *prog, SV *sv, char *strpos, - char *strend, U32 flags, - struct re_scream_pos_data_s *data); - SV* (*checkstr) (pTHX_ regexp *prog); - void (*free) (pTHX_ struct regexp* r); - SV* (*numbered_buff_get) (pTHX_ const REGEXP * const rx, I32 paren, SV* usesv); - SV* (*named_buff_get)(pTHX_ const REGEXP * const rx, SV* namesv, U32 flags); - SV* (*qr_pkg)(pTHX_ const REGEXP * const rx); + REGEXP* (*comp) (pTHX_ const SV * const pattern, const U32 flags); + I32 (*exec) (pTHX_ REGEXP * const rx, char* stringarg, char* strend, + char* strbeg, I32 minend, SV* screamer, + void* data, U32 flags); + char* (*intuit) (pTHX_ REGEXP * const rx, SV *sv, char *strpos, + char *strend, const U32 flags, + re_scream_pos_data *data); + SV* (*checkstr) (pTHX_ REGEXP * const rx); + void (*free) (pTHX_ REGEXP * const rx); + void (*numbered_buff_FETCH) (pTHX_ REGEXP * const rx, const I32 paren, + SV * const sv); + void (*numbered_buff_STORE) (pTHX_ REGEXP * const rx, const I32 paren, + SV const * const value); + I32 (*numbered_buff_LENGTH) (pTHX_ REGEXP * const rx, const SV * const sv, + const I32 paren); + SV* (*named_buff) (pTHX_ REGEXP * const rx, SV * const key, + SV * const value, const U32 flags); + SV* (*named_buff_iter) (pTHX_ REGEXP * const rx, const SV * const lastkey, + const U32 flags); + SV* (*qr_package)(pTHX_ REGEXP * const rx); #ifdef USE_ITHREADS - void* (*dupe) (pTHX_ const regexp *r, CLONE_PARAMS *param); -#endif + void* (*dupe) (pTHX_ REGEXP * const rx, CLONE_PARAMS *param); +#endif } regexp_engine; +/* + These are passed to the numbered capture variable callbacks as the + paren name. >= 1 is reserved for actual numbered captures, i.e. $1, + $2 etc. +*/ +#define RX_BUFF_IDX_PREMATCH -2 /* $` / ${^PREMATCH} */ +#define RX_BUFF_IDX_POSTMATCH -1 /* $' / ${^POSTMATCH} */ +#define RX_BUFF_IDX_FULLMATCH 0 /* $& / ${^MATCH} */ + +/* + Flags that are passed to the named_buff and named_buff_iter + callbacks above. Those routines are called from universal.c via the + Tie::Hash::NamedCapture interface for %+ and %- and the re:: + functions in the same file. +*/ + +/* The Tie::Hash::NamedCapture operation this is part of, if any */ +#define RXapif_FETCH 0x0001 +#define RXapif_STORE 0x0002 +#define RXapif_DELETE 0x0004 +#define RXapif_CLEAR 0x0008 +#define RXapif_EXISTS 0x0010 +#define RXapif_SCALAR 0x0020 +#define RXapif_FIRSTKEY 0x0040 +#define RXapif_NEXTKEY 0x0080 + +/* Whether %+ or %- is being operated on */ +#define RXapif_ONE 0x0100 /* %+ */ +#define RXapif_ALL 0x0200 /* %- */ + +/* Whether this is being called from a re:: function */ +#define RXapif_REGNAME 0x0400 +#define RXapif_REGNAMES 0x0800 +#define RXapif_REGNAMES_COUNT 0x1000 + +/* +=head1 REGEXP Functions + +=for apidoc Am|REGEXP *|SvRX|SV *sv + +Convenience macro to get the REGEXP from a SV. This is approximately +equivalent to the following snippet: + + if (SvMAGICAL(sv)) + mg_get(sv); + if (SvROK(sv) && + (tmpsv = (SV*)SvRV(sv)) && + SvTYPE(tmpsv) == SVt_PVMG && + (tmpmg = mg_find(tmpsv, PERL_MAGIC_qr))) + { + return (REGEXP *)tmpmg->mg_obj; + } + +NULL will be returned if a REGEXP* is not found. + +=for apidoc Am|bool|SvRXOK|SV* sv + +Returns a boolean indicating whether the SV contains qr magic +(PERL_MAGIC_qr). + +If you want to do something with the REGEXP* later use SvRX instead +and check for NULL. + +=cut +*/ + +#define SvRX(sv) (Perl_get_re_arg(aTHX_ sv)) +#define SvRXOK(sv) (Perl_get_re_arg(aTHX_ sv) ? TRUE : FALSE) + + /* Flags stored in regexp->extflags * These are used by code external to the regexp engine * * Note that flags starting with RXf_PMf_ have exact equivalents * stored in op_pmflags and which are defined in op.h, they are defined * numerically here only for clarity. + * + * NOTE: if you modify any RXf flags you should run regen.pl or regcomp.pl + * so that regnodes.h is updated with the changes. + * */ /* Anchor and GPOS related stuff */ @@ -153,6 +244,7 @@ typedef struct regexp_engine { #define RXf_SKIPWHITE 0x00000100 /* Pattern is for a split / / */ #define RXf_START_ONLY 0x00000200 /* Pattern is /^/ */ #define RXf_WHITE 0x00000400 /* Pattern is /\s+/ */ +#define RXf_NULL 0x40000000 /* Pattern is // */ /* 0x1F800 of extflags is used by (RXf_)PMf_COMPILETIME */ #define RXf_PMf_LOCALE 0x00000800 /* use locale */ @@ -160,7 +252,7 @@ typedef struct regexp_engine { #define RXf_PMf_SINGLELINE 0x00002000 /* /s */ #define RXf_PMf_FOLD 0x00004000 /* /i */ #define RXf_PMf_EXTENDED 0x00008000 /* /x */ -#define RXf_PMf_KEEPCOPY 0x00010000 /* /k */ +#define RXf_PMf_KEEPCOPY 0x00010000 /* /p */ /* these flags are transfered from the PMOP->op_pmflags member during compilation */ #define RXf_PMf_STD_PMMOD (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED) #define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY) @@ -202,6 +294,11 @@ typedef struct regexp_engine { #define M_PAT_MODS QR_PAT_MODS LOOP_PAT_MODS #define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS +/* + * NOTE: if you modify any RXf flags you should run regen.pl or regcomp.pl + * so that regnodes.h is updated with the changes. + * + */ /* What we have seen */ #define RXf_LOOKBEHIND_SEEN 0x00020000 @@ -220,7 +317,14 @@ typedef struct regexp_engine { #define RXf_USE_INTUIT_NOML 0x01000000 #define RXf_USE_INTUIT_ML 0x02000000 #define RXf_INTUIT_TAIL 0x04000000 -/* one bit here */ + +/* + Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will + be used by regex engines to check whether they should set + RXf_SKIPWHITE +*/ +#define RXf_SPLIT 0x08000000 + #define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML) /* Copy and tainted info */ @@ -228,6 +332,11 @@ typedef struct regexp_engine { #define RXf_TAINTED_SEEN 0x20000000 #define RXf_TAINTED 0x80000000 /* this pattern is tainted */ +/* + * NOTE: if you modify any RXf flags you should run regen.pl or regcomp.pl + * so that regnodes.h is updated with the changes. + * + */ #define RX_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN) #define RX_MATCH_TAINTED(prog) ((prog)->extflags & RXf_TAINTED_SEEN)