#define SV_SAVED_COPY
#endif
+/* offsets within a string of a particular /(.)/ capture */
+
typedef struct regexp_paren_pair {
I32 start;
I32 end;
+ /* 'start_tmp' records a new opening position before the matching end
+ * has been found, so that the old start and end values are still
+ * valid, e.g.
+ * "abc" =~ /(.(?{print "[$1]"}))+/
+ *outputs [][a][b]
+ * This field is not part of the API. */
+ I32 start_tmp;
} regexp_paren_pair;
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
+#define _invlist_union(a, b, output) _invlist_union_maybe_complement_2nd(a, b, FALSE, output)
+#define _invlist_intersection(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, FALSE, output)
+
+/* Subtracting b from a leaves in a everything that was there that isn't in b,
+ * that is the intersection of a with b's complement */
+#define _invlist_subtract(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, TRUE, output)
+#endif
+
+/* record the position of a (?{...}) within a pattern */
+
+struct reg_code_block {
+ STRLEN start;
+ STRLEN end;
+ OP *block;
+ REGEXP *src_regex;
+};
+
+
/*
The regexp/REGEXP struct, see L<perlreapi> for further documentation
on the individual fields. The struct is ordered so that the most
char *subbeg; \
SV_SAVED_COPY /* If non-NULL, SV which is COW from original */\
I32 sublen; /* Length of string pointed by subbeg */ \
+ I32 suboffset; /* byte offset of subbeg from logical start of str */ \
+ I32 subcoffset; /* suboffset equiv, but in chars (for @-/@+) */ \
/* Information about the match that isn't often used */ \
/* offset from wrapped to the start of precomp */ \
PERL_BITFIELD32 pre_prefix:4; \
- /* number of eval groups in the pattern - for security checks */\
- PERL_BITFIELD32 seen_evals:28
+ CV *qr_anoncv /* the anon sub wrapped round qr/(?{..})/ */
typedef struct regexp {
_XPV_HEAD;
#ifdef USE_ITHREADS
void* (*dupe) (pTHX_ REGEXP * const rx, CLONE_PARAMS *param);
#endif
+ REGEXP* (*op_comp) (pTHX_ SV ** const patternp, int pat_count,
+ OP *expr, const struct regexp_engine* eng,
+ REGEXP *VOL old_re,
+ bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags);
} regexp_engine;
/*
paren name. >= 1 is reserved for actual numbered captures, i.e. $1,
$2 etc.
*/
-#define RX_BUFF_IDX_PREMATCH -2 /* $` / ${^PREMATCH} */
-#define RX_BUFF_IDX_POSTMATCH -1 /* $' / ${^POSTMATCH} */
-#define RX_BUFF_IDX_FULLMATCH 0 /* $& / ${^MATCH} */
+#define RX_BUFF_IDX_CARET_PREMATCH -5 /* ${^PREMATCH} */
+#define RX_BUFF_IDX_CARET_POSTMATCH -4 /* ${^POSTMATCH} */
+#define RX_BUFF_IDX_CARET_FULLMATCH -3 /* ${^MATCH} */
+#define RX_BUFF_IDX_PREMATCH -2 /* $` */
+#define RX_BUFF_IDX_POSTMATCH -1 /* $' */
+#define RX_BUFF_IDX_FULLMATCH 0 /* $& */
/*
Flags that are passed to the named_buff and named_buff_iter
case SINGLE_PAT_MOD: *(pmfl) |= RXf_PMf_SINGLELINE; break; \
case XTENDED_PAT_MOD: *(pmfl) |= RXf_PMf_EXTENDED; break
-/* Note, includes locale, unicode */
+/* Note, includes charset ones, assumes 0 is the default for them */
#define STD_PMMOD_FLAGS_CLEAR(pmfl) \
*(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_CHARSET)
* character is bit +1, etc. */
#define STD_PAT_MODS "msix"
+#define CHARSET_PAT_MODS ASCII_RESTRICT_PAT_MODS DEPENDS_PAT_MODS LOCALE_PAT_MODS UNICODE_PAT_MODS
+
/* This string is expected by XS_re_regexp_pattern() in universal.c to be ordered
* so that the first character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of
* extflags; the next character is in bit +1, etc. */
#define INT_PAT_MODS STD_PAT_MODS KEEPCOPY_PAT_MODS
#define EXT_PAT_MODS ONCE_PAT_MODS KEEPCOPY_PAT_MODS
-#define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS
+#define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS CHARSET_PAT_MODS
#define M_PAT_MODS QR_PAT_MODS LOOP_PAT_MODS
#define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS NONDESTRUCT_PAT_MODS
* unshared area without affecting binary compatibility */
#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+1)
-/* embed.pl doesn't yet know how to handle static inline functions, so
- manually decorate them here with gcc-style attributes.
-*/
+/* Manually decorate this function with gcc-style attributes just to
+ * avoid having to restructure the header files and their called order,
+ * as proto.h would have to be included before this file, and isn't */
+
PERL_STATIC_INLINE const char *
get_regex_charset_name(const U32 flags, STRLEN* const lenp)
__attribute__warn_unused_result__;
case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
- return ASCII_MORE_RESTRICT_PAT_MODS;
+ *lenp = 2;
+ return ASCII_MORE_RESTRICT_PAT_MODS;
+ default:
+ return "?"; /* Unknown */
}
-
- return "?"; /* Unknown */
}
/* Anchor and GPOS related stuff */
#define RXf_INTUIT_TAIL (1<<(RXf_BASE_SHIFT+14))
/*
- Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will
- be used by regex engines to check whether they should set
- RXf_SKIPWHITE
+ This used to be set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e.
+ split. It was used by the regex engine to check whether it should set
+ RXf_SKIPWHITE. Regexp plugins on CPAN also have done the same thing
+ historically, so we leave this flag defined, even though it is never set.
*/
-#define RXf_SPLIT (1<<(RXf_BASE_SHIFT+15))
+#if !defined(PERL_CORE) || defined(PERL_IN_DUMP_C)
+# define RXf_SPLIT (1<<(RXf_BASE_SHIFT+15))
+#endif
#define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML)
/* Flags indicating special patterns */
#define RXf_START_ONLY (1<<(RXf_BASE_SHIFT+19)) /* Pattern is /^/ */
-#define RXf_SKIPWHITE (1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split / / */
+/* No longer used, but CPAN modules still mention it. */
+#if !defined(PERL_CORE) || defined(PERL_IN_DUMP_C)
+# define RXf_SKIPWHITE (1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split " " */
+#endif
#define RXf_WHITE (1<<(RXf_BASE_SHIFT+21)) /* Pattern is /\s+/ */
-#define RXf_NULL (1<<(RXf_BASE_SHIFT+22)) /* Pattern is // */
+#define RXf_NULL (1U<<(RXf_BASE_SHIFT+22)) /* Pattern is // */
#if RXf_BASE_SHIFT+22 > 31
# error Too many RXf_PMf bits used. See regnodes.h for any spare in middle
#endif
assert(SvTYPE(_rx_subbeg) == SVt_REGEXP); \
&SvANY(_rx_subbeg)->subbeg; \
}))
+# define RX_SUBOFFSET(prog) \
+ (*({ \
+ const REGEXP *const _rx_suboffset = (prog); \
+ assert(SvTYPE(_rx_suboffset) == SVt_REGEXP); \
+ &SvANY(_rx_suboffset)->suboffset; \
+ }))
+# define RX_SUBCOFFSET(prog) \
+ (*({ \
+ const REGEXP *const _rx_subcoffset = (prog); \
+ assert(SvTYPE(_rx_subcoffset) == SVt_REGEXP); \
+ &SvANY(_rx_subcoffset)->subcoffset; \
+ }))
# define RX_OFFS(prog) \
(*({ \
const REGEXP *const _rx_offs = (prog); \
# define RX_EXTFLAGS(prog) RXp_EXTFLAGS((struct regexp *)SvANY(prog))
# define RX_ENGINE(prog) (((struct regexp *)SvANY(prog))->engine)
# define RX_SUBBEG(prog) (((struct regexp *)SvANY(prog))->subbeg)
+# define RX_SUBOFFSET(prog) (((struct regexp *)SvANY(prog))->suboffset)
+# define RX_SUBCOFFSET(prog) (((struct regexp *)SvANY(prog))->subcoffset)
# define RX_OFFS(prog) (((struct regexp *)SvANY(prog))->offs)
# define RX_NPARENS(prog) (((struct regexp *)SvANY(prog))->nparens)
#endif
#define RX_GOFS(prog) (((struct regexp *)SvANY(prog))->gofs)
#define RX_LASTPAREN(prog) (((struct regexp *)SvANY(prog))->lastparen)
#define RX_LASTCLOSEPAREN(prog) (((struct regexp *)SvANY(prog))->lastcloseparen)
-#define RX_SEEN_EVALS(prog) (((struct regexp *)SvANY(prog))->seen_evals)
#define RX_SAVED_COPY(prog) (((struct regexp *)SvANY(prog))->saved_copy)
#endif /* PLUGGABLE_RE_EXTENSION */
#define REXEC_SCREAM 0x04 /* use scream table. */
#define REXEC_IGNOREPOS 0x08 /* \G matches at start. */
#define REXEC_NOT_FIRST 0x10 /* This is another iteration of //g. */
+ /* under REXEC_COPY_STR, it's ok for the
+ * engine (modulo PL_sawamperand etc)
+ * to skip copying ... */
+#define REXEC_COPY_SKIP_PRE 0x20 /* ...the $` part of the string, or */
+#define REXEC_COPY_SKIP_POST 0x40 /* ...the $' part of the string */
#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
# define ReREFCNT_inc(re) \
/* this first element must match u.yes */
struct regmatch_state *prev_yes_state;
U32 lastparen;
+ U32 lastcloseparen;
CHECKPOINT cp;
} branchlike;
/* the first elements must match u.branchlike */
struct regmatch_state *prev_yes_state;
U32 lastparen;
+ U32 lastcloseparen;
CHECKPOINT cp;
regnode *next_branch; /* next branch node */
/* the first elements must match u.branchlike */
struct regmatch_state *prev_yes_state;
U32 lastparen;
+ U32 lastcloseparen;
CHECKPOINT cp;
U32 accepted; /* how many accepting states left */
+ bool longfold;/* saw a fold with a 1->n char mapping */
U16 *jump; /* positive offsets from me */
- regnode *B; /* node following the trie */
regnode *me; /* Which node am I - needed for jump tries*/
U8 *firstpos;/* pos in string of first trie match */
U32 firstchars;/* len in chars of firstpos from start */
U16 nextword;/* next word to try */
U16 topword; /* longest accepted word */
- bool longfold;/* saw a fold with a 1->n char mapping */
} trie;
/* special types - these members are used to store state for special
struct regmatch_state *prev_yes_state;
I32 c1, c2; /* case fold search */
CHECKPOINT cp;
+ U32 lastparen;
+ U32 lastcloseparen;
I32 alen; /* length of first-matched A string */
I32 count;
bool minmod;
struct {
U32 paren;
CHECKPOINT cp;
+ U32 lastparen;
+ U32 lastcloseparen;
I32 c1, c2; /* case fold search */
char *maxpos; /* highest possible point in string to match */
char *oldloc; /* the previous locinput */
#define PL_reg_flags PL_reg_state.re_state_reg_flags
#define PL_bostr PL_reg_state.re_state_bostr
-#define PL_reginput PL_reg_state.re_state_reginput
#define PL_regeol PL_reg_state.re_state_regeol
-#define PL_regoffs PL_reg_state.re_state_regoffs
-#define PL_reglastparen PL_reg_state.re_state_reglastparen
-#define PL_reglastcloseparen PL_reg_state.re_state_reglastcloseparen
-#define PL_reg_start_tmp PL_reg_state.re_state_reg_start_tmp
-#define PL_reg_start_tmpl PL_reg_state.re_state_reg_start_tmpl
-#define PL_reg_eval_set PL_reg_state.re_state_reg_eval_set
#define PL_reg_match_utf8 PL_reg_state.re_state_reg_match_utf8
#define PL_reg_magic PL_reg_state.re_state_reg_magic
#define PL_reg_oldpos PL_reg_state.re_state_reg_oldpos
#define PL_reg_curpm PL_reg_state.re_state_reg_curpm
#define PL_reg_oldsaved PL_reg_state.re_state_reg_oldsaved
#define PL_reg_oldsavedlen PL_reg_state.re_state_reg_oldsavedlen
+#define PL_reg_oldsavedoffset PL_reg_state.re_state_reg_oldsavedoffset
+#define PL_reg_oldsavedcoffset PL_reg_state.re_state_reg_oldsavedcoffset
#define PL_reg_maxiter PL_reg_state.re_state_reg_maxiter
#define PL_reg_leftiter PL_reg_state.re_state_reg_leftiter
#define PL_reg_poscache PL_reg_state.re_state_reg_poscache
struct re_save_state {
U32 re_state_reg_flags; /* from regexec.c */
- U32 re_state_reg_start_tmpl; /* from regexec.c */
- I32 re_state_reg_eval_set; /* from regexec.c */
+ bool re_state_eval_setup_done; /* from regexec.c */
bool re_state_reg_match_utf8; /* from regexec.c */
+ bool re_reparsing; /* runtime (?{}) fed back into parser */
char *re_state_bostr;
- char *re_state_reginput; /* String-input pointer. */
char *re_state_regeol; /* End of input, for $ check. */
- regexp_paren_pair *re_state_regoffs; /* Pointer to start/end pairs */
- U32 *re_state_reglastparen; /* Similarly for lastparen. */
- U32 *re_state_reglastcloseparen; /* Similarly for lastcloseparen. */
- char **re_state_reg_start_tmp; /* from regexec.c */
MAGIC *re_state_reg_magic; /* from regexec.c */
PMOP *re_state_reg_oldcurpm; /* from regexec.c */
PMOP *re_state_reg_curpm; /* from regexec.c */
char *re_state_reg_oldsaved; /* old saved substr during match */
STRLEN re_state_reg_oldsavedlen; /* old length of saved substr during match */
+ STRLEN re_state_reg_oldsavedoffset; /* old offset of saved substr during match */
+ STRLEN re_state_reg_oldsavedcoffset;/* old coffset of saved substr during match */
STRLEN re_state_reg_poscache_size; /* size of pos cache of WHILEM */
I32 re_state_reg_oldpos; /* from regexec.c */
I32 re_state_reg_maxiter; /* max wait until caching pos */
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
- * indent-tabs-mode: t
+ * indent-tabs-mode: nil
* End:
*
- * ex: set ts=8 sts=4 sw=4 noet:
+ * ex: set ts=8 sts=4 sw=4 et:
*/