string can occur infinitely far to the right.
- minlenp
- A pointer to the minimum length of the pattern that the string
- was found inside. This is important as in the case of positive
+ A pointer to the minimum number of characters of the pattern that the
+ string was found inside. This is important as in the case of positive
lookahead or positive lookbehind we can have multiple patterns
involved. Consider
* these get optimized out
*
* If there are problematic code sequences, *min_subtract is set to the delta
- * that the minimum size of the node can be less than its actual size. And,
- * the node type of the result is changed to reflect that it contains these
- * sequences.
+ * number of characters that the minimum size of the node can be less than its
+ * actual size. And, the node type of the result is changed to reflect that it
+ * contains these sequences.
*
* And *has_exactf_sharp_s is set to indicate whether or not the node is EXACTF
* and contains LATIN SMALL LETTER SHARP S
* U+03C5 U+0308 U+0301 0xCF 0x85 0xCC 0x88 0xCC 0x81
*
* This means that in case-insensitive matching (or "loose
- * matching", as Unicode calls it), an EXACTF of length six (the
- * UTF-8 encoded byte length of the above casefolded versions) can
- * match a target string of length two (the byte length of UTF-8
- * encoded U+0390 or U+03B0). This would rather mess up the
- * minimum length computation. (there are other code points that
- * also fold to these two sequences, but the delta is smaller)
+ * matching", as Unicode calls it), an EXACTF of length 3 chars can
+ * match a target string of length 1 char. This would rather mess
+ * up the minimum length computation.
*
* If these sequences are found, the minimum length is decreased by
- * four (six minus two).
+ * two.
*
* Similarly, 'ss' may match the single char and byte LATIN SMALL
* LETTER SHARP S. We decrease the min length by 1 for each
break;
}
greek_sequence:
- *min_subtract += 4;
+ *min_subtract += 2;
/* This requires special handling by trie's, so change
* the node type to indicate this. If EXACTFA and
/* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
{
dVAR;
- I32 min = 0, pars = 0, code;
+ I32 min = 0; /* There must be at least this number of characters to match */
+ I32 pars = 0, code;
regnode *scan = *scanp, *next;
I32 delta = 0;
int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
fake_study_recurse:
while ( scan && OP(scan) != END && scan < last ){
- UV min_subtract = 0; /* How much to subtract from the minimum node
- length to get a real minimum (because the
- folded version may be shorter) */
+ UV min_subtract = 0; /* How mmany chars to subtract from the minimum
+ node length to get a real minimum (because
+ the folded version may be shorter) */
bool has_exactf_sharp_s = FALSE;
/* Peephole optimizer: */
DEBUG_STUDYDATA("Peep:", data,depth);
* trietype so we can turn them into a trie. If/when we
* allow NOTHING to start a trie sequence this condition will be
* required, and it isn't expensive so we leave it in for now. */
- if ( trietype != NOTHING )
+ if ( trietype && trietype != NOTHING )
make_trie( pRExC_state,
startbranch, first, cur, tail, count,
trietype, depth+1 );
"", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
});
- if ( last ) {
+ if ( last && trietype ) {
if ( trietype != NOTHING ) {
/* the last branch of the sequence was part of a trie,
* so we have to construct it here outside of the loop
RExC_seen |= REG_SEEN_EXACTF_SHARP_S;
}
min += l - min_subtract;
- if (min < 0) {
- min = 0;
- }
+ assert (min >= 0);
delta += min_subtract;
if (flags & SCF_DO_SUBSTR) {
data->pos_min += l - min_subtract;