I32 seen_zerolen;
regnode_offset *open_parens; /* offsets to open parens */
regnode_offset *close_parens; /* offsets to close parens */
+ I32 parens_buf_size; /* #slots malloced open/close_parens */
regnode *end_op; /* END node in program */
I32 utf8; /* whether the pattern is utf8 or not */
I32 orig_utf8; /* whether the pattern was originally in utf8 */
#define RExC_maxlen (pRExC_state->maxlen)
#define RExC_npar (pRExC_state->npar)
#define RExC_total_parens (pRExC_state->total_par)
+#define RExC_parens_buf_size (pRExC_state->parens_buf_size)
#define RExC_nestroot (pRExC_state->nestroot)
#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
#define RExC_utf8 (pRExC_state->utf8)
if (DEPENDS_SEMANTICS) { \
set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET); \
RExC_uni_semantics = 1; \
- if (RExC_seen_d_op && LIKELY(RExC_total_parens >= 0)) { \
+ if (RExC_seen_d_op && LIKELY(! IN_PARENS_PASS)) { \
/* No need to restart the parse if we haven't seen \
* anything that differs between /u and /d, and no need \
* to restart immediately if we're going to reparse \
} \
} STMT_END
-#define BRANCH_MAX_OFFSET U16_MAX
#define REQUIRE_BRANCHJ(flagp, restart_retval) \
STMT_START { \
RExC_use_BRANCHJ = 1; \
- if (LIKELY(RExC_total_parens >= 0)) { \
+ if (LIKELY(! IN_PARENS_PASS)) { \
/* No need to restart the parse immediately if we're \
* going to reparse anyway to count parens */ \
*flagp |= RESTART_PARSE; \
} \
} STMT_END
+/* Until we have completed the parse, we leave RExC_total_parens at 0 or
+ * less. After that, it must always be positive, because the whole re is
+ * considered to be surrounded by virtual parens. Setting it to negative
+ * indicates there is some construct that needs to know the actual number of
+ * parens to be properly handled. And that means an extra pass will be
+ * required after we've counted them all */
+#define ALL_PARENS_COUNTED (RExC_total_parens > 0)
#define REQUIRE_PARENS_PASS \
- STMT_START { \
- if (RExC_total_parens == 0) RExC_total_parens = -1; \
+ STMT_START { /* No-op if have completed a pass */ \
+ if (! ALL_PARENS_COUNTED) RExC_total_parens = -1; \
} STMT_END
+#define IN_PARENS_PASS (RExC_total_parens < 0)
+
/* This is used to return failure (zero) early from the calling function if
* various flags in 'flags' are set. Two flags always cause a return:
trie_words = newAV();
});
- re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
+ re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, GV_ADD);
assert(re_trie_maxbuff);
if (!SvIOK(re_trie_maxbuff)) {
sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
last, &data_fake, stopparen,
recursed_depth, NULL, f, depth+1);
if (scan->flags) {
- if (deltanext) {
- FAIL("Variable length lookbehind not implemented");
- }
- else if (minnext > (I32)U8_MAX) {
+ if ( deltanext < 0
+ || deltanext > (I32) U8_MAX
+ || minnext > (I32)U8_MAX
+ || minnext + deltanext > (I32)U8_MAX)
+ {
FAIL2("Lookbehind longer than %" UVuf " not implemented",
(UV)U8_MAX);
}
- scan->flags = (U8)minnext;
+
+ /* The 'next_off' field has been repurposed to count the
+ * additional starting positions to try beyond the initial
+ * one. (This leaves it at 0 for non-variable length
+ * matches to avoid breakage for those not using this
+ * extension) */
+ if (deltanext) {
+ scan->next_off = deltanext;
+ ckWARNexperimental(RExC_parse,
+ WARN_EXPERIMENTAL__VLB,
+ "Variable length lookbehind is experimental");
+ }
+ scan->flags = (U8)minnext + deltanext;
}
if (data) {
if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
stopparen, recursed_depth, NULL,
f, depth+1);
if (scan->flags) {
- if (deltanext) {
- FAIL("Variable length lookbehind not implemented");
- }
- else if (*minnextp > (I32)U8_MAX) {
+ assert(0); /* This code has never been tested since this
+ is normally not compiled */
+ if ( deltanext < 0
+ || deltanext > (I32) U8_MAX
+ || *minnextp > (I32)U8_MAX
+ || *minnextp + deltanext > (I32)U8_MAX)
+ {
FAIL2("Lookbehind longer than %" UVuf " not implemented",
(UV)U8_MAX);
}
- scan->flags = (U8)*minnextp;
+
+ if (deltanext) {
+ scan->next_off = deltanext;
+ }
+ scan->flags = (U8)*minnextp + deltanext;
}
*minnextp += min;
RExC_naughty = 0;
RExC_npar = 1;
+ RExC_parens_buf_size = 0;
RExC_emit_start = RExC_rxi->program;
pRExC_state->code_index = 0;
/* Do the parse */
if (reg(pRExC_state, 0, &flags, 1)) {
- /* Success!, But if RExC_total_parens < 0, we need to redo the parse
- * knowing how many parens there actually are */
- if (RExC_total_parens < 0) {
+ /* Success!, But we may need to redo the parse knowing how many parens
+ * there actually are */
+ if (IN_PARENS_PASS) {
flags |= RESTART_PARSE;
}
DEBUG_PARSE_r(Perl_re_printf( aTHX_ "Need to redo parse\n"));
}
- if (RExC_total_parens > 0) {
+ if (ALL_PARENS_COUNTED) {
/* Make enough room for all the known parens, and zero it */
Renew(RExC_open_parens, RExC_total_parens, regnode_offset);
Zero(RExC_open_parens, RExC_total_parens, regnode_offset);
/* It might be a forward reference; we can't fail until we
* know, by completing the parse to get all the groups, and
* then reparsing */
- if (RExC_total_parens > 0) {
+ if (ALL_PARENS_COUNTED) {
vFAIL("Reference to nonexistent named group");
}
else {
I32 freeze_paren = 0;
I32 after_freeze = 0;
I32 num; /* numeric backreferences */
+ SV * max_open; /* Max number of unclosed parens */
char * parse_start = RExC_parse; /* MJD */
char * const oregcomp_parse = RExC_parse;
PERL_ARGS_ASSERT_REG;
DEBUG_PARSE("reg ");
+
+ max_open = get_sv(RE_COMPILE_RECURSION_LIMIT, GV_ADD);
+ assert(max_open);
+ if (!SvIOK(max_open)) {
+ sv_setiv(max_open, RE_COMPILE_RECURSION_INIT);
+ }
+ if (depth > 4 * SvIV(max_open)) { /* We increase depth by 4 for each open
+ paren */
+ vFAIL("Too many nested open parens");
+ }
+
*flagp = 0; /* Tentatively. */
/* Having this true makes it feasible to have a lot fewer tests for the
/* It might be a forward reference; we can't fail until
* we know, by completing the parse to get all the
* groups, and then reparsing */
- if (RExC_total_parens > 0) {
+ if (ALL_PARENS_COUNTED) {
RExC_parse++;
vFAIL("Reference to nonexistent group");
}
/* It might be a forward reference; we can't fail until we
* know, by completing the parse to get all the groups, and
* then reparsing */
- if (RExC_total_parens > 0) {
+ if (ALL_PARENS_COUNTED) {
if (num >= RExC_total_parens) {
RExC_parse++;
vFAIL("Reference to nonexistent group");
capturing_parens:
parno = RExC_npar;
RExC_npar++;
- if (RExC_total_parens <= 0) {
+ if (! ALL_PARENS_COUNTED) {
/* If we are in our first pass through (and maybe only pass),
* we need to allocate memory for the capturing parentheses
- * data structures. Since we start at npar=1, when it reaches
- * 2, for the first time it has something to put in it. Above
- * 2 means we extend what we already have */
- if (RExC_npar == 2) {
+ * data structures.
+ */
+
+ if (!RExC_parens_buf_size) {
+ /* first guess at number of parens we might encounter */
+ RExC_parens_buf_size = 10;
+
/* setup RExC_open_parens, which holds the address of each
* OPEN tag, and to make things simpler for the 0 index the
* start of the program - this is used later for offsets */
- Newxz(RExC_open_parens, RExC_npar, regnode_offset);
+ Newxz(RExC_open_parens, RExC_parens_buf_size,
+ regnode_offset);
RExC_open_parens[0] = 1; /* +1 for REG_MAGIC */
/* setup RExC_close_parens, which holds the address of each
* CLOSE tag, and to make things simpler for the 0 index
* the end of the program - this is used later for offsets
* */
- Newxz(RExC_close_parens, RExC_npar, regnode_offset);
+ Newxz(RExC_close_parens, RExC_parens_buf_size,
+ regnode_offset);
/* we dont know where end op starts yet, so we dont need to
* set RExC_close_parens[0] like we do RExC_open_parens[0]
* above */
}
- else {
- Renew(RExC_open_parens, RExC_npar, regnode_offset);
- Zero(RExC_open_parens + RExC_npar - 1, 1, regnode_offset);
+ else if (RExC_npar > RExC_parens_buf_size) {
+ I32 old_size = RExC_parens_buf_size;
+
+ RExC_parens_buf_size *= 2;
+
+ Renew(RExC_open_parens, RExC_parens_buf_size,
+ regnode_offset);
+ Zero(RExC_open_parens + old_size,
+ RExC_parens_buf_size - old_size, regnode_offset);
- Renew(RExC_close_parens, RExC_npar, regnode_offset);
- Zero(RExC_close_parens + RExC_npar - 1, 1, regnode_offset);
+ Renew(RExC_close_parens, RExC_parens_buf_size,
+ regnode_offset);
+ Zero(RExC_close_parens + old_size,
+ RExC_parens_buf_size - old_size, regnode_offset);
}
}
RETURN_FAIL_ON_RESTART(flags, flagp);
FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags);
}
- REGTAIL(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */
+ if (! REGTAIL(pRExC_state, lastbr, br)) { /* BRANCH -> BRANCH. */
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
lastbr = br;
*flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
}
(IV)(ender - lastbr)
);
);
- REGTAIL(pRExC_state, lastbr, ender);
+ if (! REGTAIL(pRExC_state, lastbr, ender)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
if (have_branch) {
char is_nothing= 1;
for (br = REGNODE_p(ret); br; br = regnext(br)) {
const U8 op = PL_regkind[OP(br)];
if (op == BRANCH) {
- REGTAIL_STUDY(pRExC_state,
- REGNODE_OFFSET(NEXTOPER(br)),
- ender);
+ if (! REGTAIL_STUDY(pRExC_state,
+ REGNODE_OFFSET(NEXTOPER(br)),
+ ender))
+ {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
if ( OP(NEXTOPER(br)) != NOTHING
|| regnext(NEXTOPER(br)) != REGNODE_p(ender))
is_nothing= 0;
Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
Set_Node_Offset(REGNODE_p(ret), parse_start + 1);
FLAGS(REGNODE_p(ret)) = flag;
- REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL));
+ if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL)))
+ {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
}
}
/* FIXME adding one for every branch after the first is probably
* excessive now we have TRIE support. (hv) */
MARK_NAUGHTY(1);
- if ( chain > (SSize_t) BRANCH_MAX_OFFSET
- && ! RExC_use_BRANCHJ)
- {
+ if (! REGTAIL(pRExC_state, chain, latest)) {
/* XXX We could just redo this branch, but figuring out what
- * bookkeeping needs to be reset is a pain */
+ * bookkeeping needs to be reset is a pain, and it's likely
+ * that other branches that goto END will also be too large */
REQUIRE_BRANCHJ(flagp, 0);
}
- REGTAIL(pRExC_state, chain, latest);
}
chain = latest;
c++;
/* It might be a forward reference; we can't fail until we
* know, by completing the parse to get all the groups, and
* then reparsing */
- if (RExC_total_parens > 0) {
+ if (ALL_PARENS_COUNTED) {
if (num >= RExC_total_parens) {
vFAIL("Reference to nonexistent group");
}
src = REGNODE_p(RExC_emit);
RExC_emit += size;
dst = REGNODE_p(RExC_emit);
- if (RExC_open_parens) {
+
+ /* If we are in a "count the parentheses" pass, the numbers are unreliable,
+ * and [perl #133871] shows this can lead to problems, so skip this
+ * realignment of parens until a later pass when they are reliable */
+ if (! IN_PARENS_PASS && RExC_open_parens) {
int paren;
/*DEBUG_PARSE_FMT("inst"," - %" IVdf, (IV)RExC_npar);*/
/* remember that RExC_npar is rex->nparens + 1,
}
/*
-- regtail - set the next-pointer at the end of a node chain of p to val.
+- regtail - set the next-pointer at the end of a node chain of p to val. If
+ that value won't fit in the space available, instead returns FALSE.
+ (Except asserts if we can't fit in the largest space the regex
+ engine is designed for.)
- SEE ALSO: regtail_study
*/
-STATIC void
+STATIC bool
S_regtail(pTHX_ RExC_state_t * pRExC_state,
const regnode_offset p,
const regnode_offset val,
}
if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
+ assert(val - scan <= U32_MAX);
ARG_SET(REGNODE_p(scan), val - scan);
}
else {
+ if (val - scan > U16_MAX) {
+ /* Since not all callers check the return value, populate this with
+ * something that won't loop and will likely lead to a crash if
+ * execution continues */
+ NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
+ return FALSE;
+ }
NEXT_OFF(REGNODE_p(scan)) = val - scan;
}
+
+ return TRUE;
}
#ifdef DEBUGGING
Currently only used when in DEBUG mode. The macro REGTAIL_STUDY() is used
to control which is which.
+This used to return a value that was ignored. It was a problem that it is
+#ifdef'd to be another function that didn't return a value. khw has changed it
+so both currently return a pass/fail return.
+
*/
/* TODO: All four parms should be const */
-STATIC U8
+STATIC bool
S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
const regnode_offset val, U32 depth)
{
bool unfolded_multi_char; /* Unexamined in this routine */
if (join_exact(pRExC_state, scan, &min,
&unfolded_multi_char, 1, REGNODE_p(val), depth+1))
- return EXACT;
+ return TRUE; /* Was return EXACT */
}
#endif
if ( exact ) {
);
});
if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
+ assert(val - scan <= U32_MAX);
ARG_SET(REGNODE_p(scan), val - scan);
}
else {
+ if (val - scan > U16_MAX) {
+ NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
+ return FALSE;
+ }
NEXT_OFF(REGNODE_p(scan)) = val - scan;
}
- return exact;
+ return TRUE; /* Was 'return exact' */
}
#endif
assert(FLAGS(o) < C_ARRAY_LENGTH(bounds));
sv_catpv(sv, bounds[FLAGS(o)]);
}
- else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
- Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
+ else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) {
+ Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags));
+ if (o->next_off) {
+ Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off);
+ }
+ Perl_sv_catpvf(aTHX_ sv, "]");
+ }
else if (OP(o) == SBOL)
Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^");