static void
S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
- U32 depth, int is_inf)
+ U32 depth, int is_inf,
+ SSize_t min, SSize_t stopmin, SSize_t delta)
{
DECLARE_AND_GET_RE_DEBUG_FLAGS;
DEBUG_OPTIMISE_MORE_r({
if (!data)
return;
- Perl_re_indentf(aTHX_ "%s: Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
+ Perl_re_indentf(aTHX_ "%s: M/S/D: %" IVdf "/%" IVdf "/%" IVdf " Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
depth,
where,
+ min, stopmin, delta,
(IV)data->pos_min,
(IV)data->pos_delta,
(UV)data->flags
}
-# define DEBUG_STUDYDATA(where, data, depth, is_inf) \
- S_debug_studydata(aTHX_ where, data, depth, is_inf)
+# define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) \
+ S_debug_studydata(aTHX_ where, data, depth, is_inf, min, stopmin, delta)
# define DEBUG_PEEP(str, scan, depth, flags) \
S_debug_peep(aTHX_ str, pRExC_state, scan, depth, flags)
#else
-# define DEBUG_STUDYDATA(where, data, depth, is_inf) NOOP
+# define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) NOOP
# define DEBUG_PEEP(str, scan, depth, flags) NOOP
#endif
}
data->last_end = -1;
data->flags &= ~SF_BEFORE_EOL;
- DEBUG_STUDYDATA("commit", data, 0, is_inf);
+ DEBUG_STUDYDATA("commit", data, 0, is_inf, -1, -1, -1);
}
/* An SSC is just a regnode_charclass_posix with an extra field: the inversion
S_study_chunk(pTHX_
RExC_state_t *pRExC_state,
regnode **scanp, /* Start here (read-write). */
- SSize_t *minlenp,
+ SSize_t *minlenp, /* used for the minlen of substrings? */
SSize_t *deltap, /* Write maxlen-minlen here. */
regnode *last, /* Stop before this one. */
scan_data_t *data, /* string data about the pattern */
a higher caller is holding a ptr to them. */
)
{
- SSize_t final_minlen;
- /* There must be at least this number of characters to match */
- SSize_t min = 0;
- I32 pars = 0, code;
- regnode *scan = *scanp, *next;
- SSize_t delta = 0;
+ /* vars about the regnodes we are working with */
+ regnode *scan = *scanp; /* the current opcode we are inspecting */
+ regnode *next = NULL; /* the next opcode beyond scan, tmp var */
+ regnode *first_non_open = scan; /* FIXME: should this init to NULL?
+ the first non open regop, if the init
+ val IS an OPEN then we will skip past
+ it just after the var decls section */
+ I32 code = 0; /* temp var used to hold the optype of a regop */
+
+ /* vars about the min and max length of the pattern */
+ SSize_t min = 0; /* min length of this part of the pattern */
+ SSize_t stopmin = OPTIMIZE_INFTY; /* min length accounting for ACCEPT
+ this is adjusted down if we find
+ an ACCEPT */
+ SSize_t delta = 0; /* difference between min and max length
+ (not accounting for stopmin) */
+
+ /* vars about capture buffers in the pattern */
+ I32 pars = 0; /* count of OPEN opcodes */
+ I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0; /* is this op an OPEN? */
+
+ /* vars about whether this pattern contains something that can match
+ * infinitely long strings, eg, X* or X+ */
int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
int is_inf_internal = 0; /* The studied chunk is infinite */
- I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
- scan_data_t data_fake;
- SV *re_trie_maxbuff = NULL;
- regnode *first_non_open = scan;
- SSize_t stopmin = OPTIMIZE_INFTY;
- scan_frame *frame = NULL;
+
+ /* scan_data_t (struct) is used to hold information about the substrings
+ * and start class we have extracted from the string */
+ scan_data_t data_fake; /* temp var used for recursing in some cases */
+
+ SV *re_trie_maxbuff = NULL; /* temp var used to hold whether we can do
+ trie optimizations */
+
+ scan_frame *frame = NULL; /* used as part of fake recursion */
+
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_STUDY_CHUNK;
first_non_open=regnext(first_non_open);
}
-
fake_study_recurse:
DEBUG_r(
RExC_study_chunk_recursed_count++;
*/
bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
/* Peephole optimizer: */
- DEBUG_STUDYDATA("Peep", data, depth, is_inf);
+ DEBUG_STUDYDATA("Peep", data, depth, is_inf, min, stopmin, delta);
DEBUG_PEEP("Peep", scan, depth, flags);
}
if (flags & SCF_DO_STCLASS)
ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class);
+ DEBUG_STUDYDATA("end BRANCH", data, depth, is_inf, min, stopmin, delta);
}
if (code == IFTHEN && num < 2) /* Empty ELSE branch */
min1 = 0;
flags |= SCF_DO_STCLASS_OR;
}
}
+ DEBUG_STUDYDATA("pre TRIE", data, depth, is_inf, min, stopmin, delta);
if (PERL_ENABLE_TRIE_OPTIMISATION
&& OP(startbranch) == BRANCH
} /* end if ( prev) */
} /* TRIE_MAXBUF is non zero */
} /* do trie */
-
+ DEBUG_STUDYDATA("after TRIE", data, depth, is_inf, min, stopmin, delta);
}
else if ( code == BRANCHJ ) { /* single branch is optimized. */
scan = NEXTOPER(NEXTOPER(scan));
RExC_study_chunk_recursed_bytes, U8);
}
/* we havent recursed into this paren yet, so recurse into it */
- DEBUG_STUDYDATA("gosub-set", data, depth, is_inf);
+ DEBUG_STUDYDATA("gosub-set", data, depth, is_inf, min, stopmin, delta);
PAREN_SET(recursed_depth, paren);
my_recursed_depth= recursed_depth + 1;
} else {
- DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf);
+ DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf, min, stopmin, delta);
/* some form of infinite recursion, assume infinite length
* */
if (flags & SCF_DO_SUBSTR) {
(frame && frame->in_gosub) || OP(scan) == GOSUB
);
- DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
+ DEBUG_STUDYDATA("frame-new", data, depth, is_inf, min, stopmin, delta);
DEBUG_PEEP("fnew", scan, depth, flags);
frame = newframe;
ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
}
flags &= ~SCF_DO_STCLASS;
+ DEBUG_STUDYDATA("end EXACT", data, depth, is_inf, min, stopmin, delta);
}
else if (PL_regkind[OP(scan)] == EXACT) {
/* But OP != EXACT!, so is EXACTFish */
flags &= ~SCF_DO_STCLASS;
SvREFCNT_dec(EXACTF_invlist);
}
+ DEBUG_STUDYDATA("end EXACTish", data, depth, is_inf, min, stopmin, delta);
}
else if (REGNODE_VARIES(OP(scan))) {
SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
}
if (stopmin > min)
stopmin = min;
- DEBUG_STUDYDATA("after-whilem", data, depth, is_inf);
+ DEBUG_STUDYDATA("after-whilem accept", data, depth, is_inf, min, stopmin, delta);
}
/* Try powerful optimization CURLYX => CURLYN. */
if ( OP(oscan) == CURLYX && data
last, &data_fake, stopparen,
recursed_depth, NULL, f, depth+1,
mutate_ok);
+
if (scan->flags) {
if ( deltanext < 0
|| deltanext > (I32) U8_MAX
|= SSC_MATCHES_EMPTY_STRING;
}
}
+ DEBUG_STUDYDATA("end LOOKAROUND", data, depth, is_inf, min, stopmin, delta);
}
#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
else {
if (OP(scan)==ACCEPT) {
/* m{(*ACCEPT)x} does not have to start with 'x' */
flags &= ~SCF_DO_STCLASS;
- if (data) {
+ if (data)
data->flags |= SCF_SEEN_ACCEPT;
- if (stopmin > min)
- stopmin = min;
- }
+ if (stopmin > min)
+ stopmin = min;
}
}
else if (OP(scan) == COMMIT) {
if (flags & SCF_DO_STCLASS)
ssc_or(pRExC_state, &accum, (regnode_charclass *) &this_class);
}
+ DEBUG_STUDYDATA("after JUMPTRIE", data, depth, is_inf, min, stopmin, delta);
}
if (flags & SCF_DO_SUBSTR) {
data->pos_min += min1;
}
}
scan= tail;
+ DEBUG_STUDYDATA("after TRIE study", data, depth, is_inf, min, stopmin, delta);
continue;
}
#else
/* we need to unwind recursion. */
depth = depth - 1;
- DEBUG_STUDYDATA("frame-end", data, depth, is_inf);
+ DEBUG_STUDYDATA("frame-end", data, depth, is_inf, min, stopmin, delta);
DEBUG_PEEP("fend", scan, depth, flags);
/* restore previous context */
}
assert(!frame);
- DEBUG_STUDYDATA("pre-fin", data, depth, is_inf);
+ DEBUG_STUDYDATA("pre-fin", data, depth, is_inf, min, stopmin, delta);
+
+ if (min > stopmin) {
+ /* stopmin might be shorter than min if we saw an (*ACCEPT). If
+ this is the case then it means this pattern is variable length
+ and we need to ensure that the delta accounts for it. delta
+ represents the difference between min length and max length for
+ this part of the pattern. */
+ delta += min - stopmin;
+ min = stopmin;
+ }
*scanp = scan;
*deltap = is_inf_internal ? OPTIMIZE_INFTY : delta;
if (flags & SCF_TRIE_RESTUDY)
data->flags |= SCF_TRIE_RESTUDY;
- DEBUG_STUDYDATA("post-fin", data, depth, is_inf);
-
- final_minlen = min < stopmin
- ? min : stopmin;
if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
- if (final_minlen > OPTIMIZE_INFTY - delta)
+ if (min > OPTIMIZE_INFTY - delta)
RExC_maxlen = OPTIMIZE_INFTY;
- else if (RExC_maxlen < final_minlen + delta)
- RExC_maxlen = final_minlen + delta;
+ else if (RExC_maxlen < min + delta)
+ RExC_maxlen = min + delta;
}
- return final_minlen;
+ DEBUG_STUDYDATA("post-fin", data, depth, is_inf, min, stopmin, delta);
+ return min;
}
/* add a data member to the struct reg_data attached to this regex, it should