struct regexp;
struct reg_substr_datum {
- I32 min_offset;
- I32 max_offset;
+ SSize_t min_offset;
+ SSize_t max_offset;
SV *substr; /* non-utf8 variant */
SV *utf8_substr; /* utf8 variant */
- I32 end_shift;
+ SSize_t end_shift;
};
struct reg_substr_data {
struct reg_substr_datum data[3]; /* Actual array */
/* offsets within a string of a particular /(.)/ capture */
typedef struct regexp_paren_pair {
- I32 start;
- I32 end;
+ SSize_t start;
+ SSize_t end;
/* 'start_tmp' records a new opening position before the matching end
* has been found, so that the old start and end values are still
* valid, e.g.
* "abc" =~ /(.(?{print "[$1]"}))+/
*outputs [][a][b]
* This field is not part of the API. */
- I32 start_tmp;
+ SSize_t start_tmp;
} regexp_paren_pair;
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
/* Information about the match that the perl core uses to */ \
/* manage things */ \
U32 extflags; /* Flags used both externally and internally */ \
- I32 minlen; /* mininum possible number of chars in string to match */\
- I32 minlenret; /* mininum possible number of chars in $& */ \
- U32 gofs; /* chars left of pos that we search from */ \
+ SSize_t minlen; /* mininum possible number of chars in string to match */\
+ SSize_t minlenret; /* mininum possible number of chars in $& */ \
+ STRLEN gofs; /* chars left of pos that we search from */ \
/* substring data about strings that must appear in the */ \
/* final match, used for optimisations */ \
struct reg_substr_data *substrs; \
/* saved or original string so \digit works forever. */ \
char *subbeg; \
SV_SAVED_COPY /* If non-NULL, SV which is COW from original */\
- I32 sublen; /* Length of string pointed by subbeg */ \
- I32 suboffset; /* byte offset of subbeg from logical start of str */ \
- I32 subcoffset; /* suboffset equiv, but in chars (for @-/@+) */ \
+ SSize_t sublen; /* Length of string pointed by subbeg */ \
+ SSize_t suboffset; /* byte offset of subbeg from logical start of str */ \
+ SSize_t subcoffset; /* suboffset equiv, but in chars (for @-/@+) */ \
/* Information about the match that isn't often used */ \
+ SSize_t maxlen; /* mininum possible number of chars in string to match */\
/* offset from wrapped to the start of precomp */ \
PERL_BITFIELD32 pre_prefix:4; \
/* original flags used to compile the pattern, may differ */ \
typedef struct re_scream_pos_data_s
{
char **scream_olds; /* match pos */
- I32 *scream_pos; /* Internal iterator of scream. */
+ SSize_t *scream_pos; /* Internal iterator of scream. */
} re_scream_pos_data;
/* regexp_engine structure. This is the dispatch table for regexes.
typedef struct regexp_engine {
REGEXP* (*comp) (pTHX_ SV * const pattern, U32 flags);
I32 (*exec) (pTHX_ REGEXP * const rx, char* stringarg, char* strend,
- char* strbeg, I32 minend, SV* screamer,
+ char* strbeg, SSize_t minend, SV* sv,
void* data, U32 flags);
char* (*intuit) (pTHX_
REGEXP * const rx,
=for apidoc Am|REGEXP *|SvRX|SV *sv
-Convenience macro to get the REGEXP from a SV. This is approximately
+Convenience macro to get the REGEXP from a SV. This is approximately
equivalent to the following snippet:
if (SvMAGICAL(sv))
}
}
-/* Anchor and GPOS related stuff */
-#define RXf_ANCH_BOL (1<<(RXf_BASE_SHIFT+0))
-#define RXf_ANCH_MBOL (1<<(RXf_BASE_SHIFT+1))
-#define RXf_ANCH_SBOL (1<<(RXf_BASE_SHIFT+2))
-#define RXf_ANCH_GPOS (1<<(RXf_BASE_SHIFT+3))
-#define RXf_GPOS_SEEN (1<<(RXf_BASE_SHIFT+4))
-#define RXf_GPOS_FLOAT (1<<(RXf_BASE_SHIFT+5))
-/* two bits here */
-#define RXf_ANCH (RXf_ANCH_BOL|RXf_ANCH_MBOL|RXf_ANCH_GPOS|RXf_ANCH_SBOL)
-#define RXf_GPOS_CHECK (RXf_GPOS_SEEN|RXf_ANCH_GPOS)
-#define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS)
+/* Do we have some sort of anchor? */
+#define RXf_IS_ANCHORED (1<<(RXf_BASE_SHIFT+0))
+#define RXf_UNUSED1 (1<<(RXf_BASE_SHIFT+1))
+#define RXf_UNUSED2 (1<<(RXf_BASE_SHIFT+2))
+#define RXf_UNUSED3 (1<<(RXf_BASE_SHIFT+3))
+#define RXf_UNUSED4 (1<<(RXf_BASE_SHIFT+4))
+#define RXf_UNUSED5 (1<<(RXf_BASE_SHIFT+5))
/* What we have seen */
#define RXf_NO_INPLACE_SUBST (1<<(RXf_BASE_SHIFT+6))
#define RXf_EVAL_SEEN (1<<(RXf_BASE_SHIFT+7))
-#define RXf_CANY_SEEN (1<<(RXf_BASE_SHIFT+8))
+#define RXf_UNUSED8 (1<<(RXf_BASE_SHIFT+8))
/* Special */
-#define RXf_NOSCAN (1<<(RXf_BASE_SHIFT+9))
+#define RXf_UNBOUNDED_QUANTIFIER_SEEN (1<<(RXf_BASE_SHIFT+9))
#define RXf_CHECK_ALL (1<<(RXf_BASE_SHIFT+10))
/* UTF8 related */
#define RX_SAVED_COPY(prog) (ReANY(prog)->saved_copy)
/* last match was zero-length */
#define RX_ZERO_LEN(prog) \
- (RX_OFFS(prog)[0].start + RX_GOFS(prog) == (UV)RX_OFFS(prog)[0].end)
+ (RX_OFFS(prog)[0].start + (SSize_t)RX_GOFS(prog) \
+ == RX_OFFS(prog)[0].end)
#endif /* PLUGGABLE_RE_EXTENSION */
/* Whether the pattern stored at RX_WRAPPED is in UTF-8 */
#define RX_UTF8(prog) SvUTF8(prog)
-#define REXEC_COPY_STR 0x01 /* Need to copy the string. */
-#define REXEC_CHECKED 0x02 /* check_substr already checked. */
-#define REXEC_SCREAM 0x04 /* use scream table. */
-#define REXEC_IGNOREPOS 0x08 /* \G matches at start. */
-#define REXEC_NOT_FIRST 0x10 /* This is another iteration of //g. */
- /* under REXEC_COPY_STR, it's ok for the
- * engine (modulo PL_sawamperand etc)
- * to skip copying ... */
-#define REXEC_COPY_SKIP_PRE 0x20 /* ...the $` part of the string, or */
-#define REXEC_COPY_SKIP_POST 0x40 /* ...the $' part of the string */
+
+/* bits in flags arg of Perl_regexec_flags() */
+
+#define REXEC_COPY_STR 0x01 /* Need to copy the string for captures. */
+#define REXEC_CHECKED 0x02 /* re_intuit_start() already called. */
+#define REXEC_SCREAM 0x04 /* currently unused. */
+#define REXEC_IGNOREPOS 0x08 /* use stringarg, not pos(), for \G match */
+#define REXEC_NOT_FIRST 0x10 /* This is another iteration of //g:
+ no need to copy string again */
+
+ /* under REXEC_COPY_STR, it's ok for the
+ engine (modulo PL_sawamperand etc)
+ to skip copying: ... */
+#define REXEC_COPY_SKIP_PRE 0x20 /* ...the $` part of the string, or */
+#define REXEC_COPY_SKIP_POST 0x40 /* ...the $' part of the string */
+#define REXEC_FAIL_ON_UNDERFLOW 0x80 /* fail the match if $& would start before
+ the start pos (so s/.\G// would fail
+ on second iteration */
#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
# define ReREFCNT_inc(re) \
STRLEN suboffset; /* saved suboffset field from rex */
STRLEN subcoffset; /* saved subcoffset field from rex */
MAGIC *pos_magic; /* pos() magic attached to $_ */
- I32 pos; /* the original value of pos() in pos_magic */
+ SSize_t pos; /* the original value of pos() in pos_magic */
+ U8 pos_flags; /* flags to be restored; currently only MGf_BYTES*/
} regmatch_info_aux_eval;