|NULLOK bool *is_bare_re \
|U32 rx_flags|U32 pm_flags
Ap |REGEXP*|re_compile |NN SV * const pattern|U32 orig_rx_flags
-Ap |char* |re_intuit_start|NN REGEXP * const rx|NULLOK SV* sv|NN char* strpos \
- |NN char* strend|const U32 flags \
+Ap |char* |re_intuit_start|NN REGEXP * const rx \
+ |NULLOK SV* sv \
+ |NN const char* const strbeg \
+ |NN char* strpos \
+ |NN char* strend \
+ |const U32 flags \
|NULLOK re_scream_pos_data *data
Ap |SV* |re_intuit_string|NN REGEXP *const r
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
#define pv_pretty(a,b,c,d,e,f,g) Perl_pv_pretty(aTHX_ a,b,c,d,e,f,g)
#define pv_uni_display(a,b,c,d,e) Perl_pv_uni_display(aTHX_ a,b,c,d,e)
#define re_compile(a,b) Perl_re_compile(aTHX_ a,b)
-#define re_intuit_start(a,b,c,d,e,f) Perl_re_intuit_start(aTHX_ a,b,c,d,e,f)
+#define re_intuit_start(a,b,c,d,e,f,g) Perl_re_intuit_start(aTHX_ a,b,c,d,e,f,g)
#define re_intuit_string(a) Perl_re_intuit_string(aTHX_ a)
#define reentrant_free() Perl_reentrant_free(aTHX)
#define reentrant_init() Perl_reentrant_init(aTHX)
use strict;
use warnings;
-our $VERSION = "0.24";
+our $VERSION = "0.25";
our @ISA = qw(Exporter);
our @EXPORT_OK = ('regmust',
qw(is_regexp regexp_pattern
char* strbeg, I32 minend, SV* screamer,
void* data, U32 flags);
-extern char* my_re_intuit_start (pTHX_ REGEXP * const prog, SV *sv, char *strpos,
- char *strend, const U32 flags,
- struct re_scream_pos_data_s *data);
+extern char* my_re_intuit_start(pTHX_
+ REGEXP * const rx,
+ SV *sv,
+ const char * const strbeg,
+ char *strpos,
+ char *strend,
+ const U32 flags,
+ re_scream_pos_data *data);
+
extern SV* my_re_intuit_string (pTHX_ REGEXP * const prog);
extern void my_regfree (pTHX_ REGEXP * const r);
#define CALLREGEXEC(prog,stringarg,strend,strbeg,minend,screamer,data,flags) \
RX_ENGINE(prog)->exec(aTHX_ (prog),(stringarg),(strend), \
(strbeg),(minend),(screamer),(data),(flags))
-#define CALLREG_INTUIT_START(prog,sv,strpos,strend,flags,data) \
- RX_ENGINE(prog)->intuit(aTHX_ (prog), (sv), (strpos), \
+#define CALLREG_INTUIT_START(prog,sv,strbeg,strpos,strend,flags,data) \
+ RX_ENGINE(prog)->intuit(aTHX_ (prog), (sv), (strbeg), (strpos), \
(strend),(flags),(data))
#define CALLREG_INTUIT_STRING(prog) \
RX_ENGINE(prog)->checkstr(aTHX_ (prog))
void* data, U32 flags);
char* (*intuit) (pTHX_
REGEXP * const rx, SV *sv,
+ const char * const strbeg,
char *strpos, char *strend, U32 flags,
struct re_scream_pos_data_s *data);
SV* (*checkstr) (pTHX_ REGEXP * const rx);
=head2 intuit
- char* intuit(pTHX_ REGEXP * const rx,
- SV *sv, char *strpos, char *strend,
- const U32 flags, struct re_scream_pos_data_s *data);
+ char* intuit(pTHX_
+ REGEXP * const rx,
+ SV *sv,
+ const char * const strbeg,
+ char *strpos,
+ char *strend,
+ const U32 flags,
+ struct re_scream_pos_data_s *data);
Find the start position where a regex match should be attempted,
or possibly if the regex engine should not be run because the
depending on the values of the C<extflags> member of the C<regexp>
structure.
+Arguments:
+
+ rx: the regex to match against
+ sv: the SV being matched: only used for utf8 flag; the string
+ itself is accessed via the pointers below. Note that on
+ something like an overloaded SV, SvPOK(sv) may be false
+ and the string pointers may point to something unrelated to
+ the SV itself.
+ strbeg: real beginning of string
+ strpos: the point in the string at which to begin matching
+ strend: pointer to the byte following the last char of the string
+ flags currently unused; set to 0
+ data: currently unused; set to NULL
+
+
=head2 checkstr
SV* checkstr(pTHX_ REGEXP * const rx);
}
if (RX_EXTFLAGS(rx) & RXf_USE_INTUIT &&
DO_UTF8(TARG) == (RX_UTF8(rx) != 0)) {
- /* FIXME - can PL_bostr be made const char *? */
- PL_bostr = (char *)truebase;
- s = CALLREG_INTUIT_START(rx, TARG, (char *)s, (char *)strend, r_flags, NULL);
+ s = CALLREG_INTUIT_START(rx, TARG, truebase,
+ (char *)s, (char *)strend, r_flags, NULL);
if (!s)
goto nope;
orig = m = s;
if (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) {
- PL_bostr = orig;
- s = CALLREG_INTUIT_START(rx, TARG, s, strend, r_flags, NULL);
+ s = CALLREG_INTUIT_START(rx, TARG, orig, s, strend, r_flags, NULL);
if (!s)
goto ret_no;
#define PERL_ARGS_ASSERT_RE_COMPILE \
assert(pattern)
-PERL_CALLCONV char* Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV* sv, char* strpos, char* strend, const U32 flags, re_scream_pos_data *data)
+PERL_CALLCONV char* Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV* sv, const char* const strbeg, char* strpos, char* strend, const U32 flags, re_scream_pos_data *data)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_3)
- __attribute__nonnull__(pTHX_4);
+ __attribute__nonnull__(pTHX_4)
+ __attribute__nonnull__(pTHX_5);
#define PERL_ARGS_ASSERT_RE_INTUIT_START \
- assert(rx); assert(strpos); assert(strend)
+ assert(rx); assert(strbeg); assert(strpos); assert(strend)
PERL_CALLCONV SV* Perl_re_intuit_string(pTHX_ REGEXP *const r)
__attribute__nonnull__(pTHX_1);
The nodes of the REx which we used for the search should have been
deleted from the finite automaton. */
+/* args:
+ * rx: the regex to match against
+ * sv: the SV being matched: only used for utf8 flag; the string
+ * itself is accessed via the pointers below. Note that on
+ * something like an overloaded SV, SvPOK(sv) may be false
+ * and the string pointers may point to something unrelated to
+ * the SV itself.
+ * strbeg: real beginning of string
+ * strpos: the point in the string at which to begin matching
+ * strend: pointer to the byte following the last char of the string
+ * flags currently unused; set to 0
+ * data: currently unused; set to NULL
+ */
+
char *
-Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
- char *strend, const U32 flags, re_scream_pos_data *data)
+Perl_re_intuit_start(pTHX_
+ REGEXP * const rx,
+ SV *sv,
+ const char * const strbeg,
+ char *strpos,
+ char *strend,
+ const U32 flags,
+ re_scream_pos_data *data)
{
dVAR;
struct regexp *const prog = ReANY(rx);
I32 end_shift = 0;
char *s;
SV *check;
- char *strbeg;
char *t;
const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
I32 ml_anch;
is_utf8_pat = cBOOL(RX_UTF8(rx));
- DEBUG_EXECUTE_r(
- debug_start_match(rx, utf8_target, strpos, strend,
- sv ? "Guessing start of match in sv for"
- : "Guessing start of match in string for");
- );
-
/* CHR_DIST() would be more correct here but it makes things slow. */
if (prog->minlen > strend - strpos) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
goto fail;
}
- /* XXX we need to pass strbeg as a separate arg: the following is
- * guesswork and can be wrong... */
- if (sv && SvPOK(sv)) {
- char * p = SvPVX(sv);
- STRLEN cur = SvCUR(sv);
- if (p <= strpos && strpos < p + cur) {
- strbeg = p;
- assert(p <= strend && strend <= p + cur);
- }
- else
- strbeg = strend - cur;
- }
- else
- strbeg = strpos;
+ PL_bostr = (char *)strbeg;
reginfo->strend = strend;
reginfo->is_utf8_pat = is_utf8_pat;
if (!ml_anch) {
if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
&& !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
- /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
- && sv && !SvROK(sv)
&& (strpos != strbeg)) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
goto fail;
try_at_start:
/* Even in this situation we may use MBOL flag if strpos is offset
wrt the start of the string. */
- if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
- && (strpos != strbeg) && strpos[-1] != '\n'
+ if (ml_anch && (strpos != strbeg) && strpos[-1] != '\n'
/* May be due to an implicit anchor of m{.*foo} */
&& !(prog->intflags & PREGf_IMPLICIT))
{
d.scream_olds = &scream_olds;
d.scream_pos = &scream_pos;
- s = re_intuit_start(rx, sv, s, strend, flags, &d);
+ s = re_intuit_start(rx, sv, strbeg, s, strend, flags, &d);
if (!s) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
goto phooey; /* not present */
goto phooey;
}
if (prog->extflags & RXf_USE_INTUIT) {
- s = re_intuit_start(rx, sv, s + UTF8SKIP(s), strend, flags, NULL);
+ s = re_intuit_start(rx, sv, strbeg,
+ s + UTF8SKIP(s), strend, flags, NULL);
if (!s) {
goto phooey;
}
goto phooey;
}
if (prog->extflags & RXf_USE_INTUIT) {
- s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
+ s = re_intuit_start(rx, sv, strbeg,
+ s + 1, strend, flags, NULL);
if (!s) {
goto phooey;
}
I32 (*exec) (pTHX_ REGEXP * const rx, char* stringarg, char* strend,
char* strbeg, I32 minend, SV* screamer,
void* data, U32 flags);
- char* (*intuit) (pTHX_ REGEXP * const rx, SV *sv, char *strpos,
- char *strend, const U32 flags,
+ char* (*intuit) (pTHX_
+ REGEXP * const rx,
+ SV *sv,
+ const char * const strbeg,
+ char *strpos,
+ char *strend,
+ const U32 flags,
re_scream_pos_data *data);
SV* (*checkstr) (pTHX_ REGEXP * const rx);
void (*free) (pTHX_ REGEXP * const rx);