RExC_study_chunk_recursed_bytes, U8);
}
/* we havent recursed into this paren yet, so recurse into it */
- DEBUG_STUDYDATA("set:", data,depth);
+ DEBUG_STUDYDATA("gosub-set:", data,depth);
PAREN_SET(RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes), paren);
my_recursed_depth= recursed_depth + 1;
} else {
- DEBUG_STUDYDATA("inf:", data,depth);
+ DEBUG_STUDYDATA("gosub-inf:", data,depth);
/* some form of infinite recursion, assume infinite length
* */
if (flags & SCF_DO_SUBSTR) {
if (r->minlen < minlen)
r->minlen = minlen;
- if (RExC_seen & REG_RECURSE_SEEN )
+ if (RExC_seen & REG_RECURSE_SEEN ) {
r->intflags |= PREGf_RECURSE_SEEN;
+ Newxz(r->recurse_locinput, r->nparens + 1, char *);
+ }
if (RExC_seen & REG_GPOS_SEEN)
r->intflags |= PREGf_GPOS_SEEN;
if (RExC_seen & REG_LOOKBEHIND_SEEN)
#endif
Safefree(r->offs);
SvREFCNT_dec(r->qr_anoncv);
+ if (r->recurse_locinput)
+ Safefree(r->recurse_locinput);
rx->sv_u.svu_rx = 0;
}
#endif
ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
SvREFCNT_inc_void(ret->qr_anoncv);
+ if (r->recurse_locinput)
+ Newxz(ret->recurse_locinput,r->nparens + 1,char *);
return ret_x;
}
#define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL)
/*
- re_dup - duplicate a regexp.
+ re_dup_guts - duplicate a regexp.
This routine is expected to clone a given regexp structure. It is only
compiled under USE_ITHREADS.
RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param));
+ if (r->recurse_locinput)
+ Newxz(ret->recurse_locinput,r->nparens + 1,char *);
if (ret->pprivate)
RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
char, regexp_internal);
Copy(ri->program, reti->program, len+1, regnode);
+
reti->num_code_blocks = ri->num_code_blocks;
if (ri->code_blocks) {
int n;
d->data[i] = ri->data->data[i];
break;
default:
- Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'",
+ Perl_croak(aTHX_ "panic: re_dup_guts unknown data code '%c'",
ri->data->what[i]);
}
}
/* This is the stuff that used to live in regexp.h that was truly
private to the engine itself. It now lives here. */
-
-
typedef struct regexp_internal {
int name_list_idx; /* Optional data index of an array of paren names */
union {
));
}
+ if (prog->recurse_locinput)
+ Zero(prog->recurse_locinput,prog->nparens + 1, char *);
+
/* Simplest case: anchored match need be tried only once, or with
* MBOL, only at the beginning of each line.
*
bool is_utf8_pat = reginfo->is_utf8_pat;
bool match = FALSE;
-
#ifdef DEBUGGING
GET_RE_DEBUG_FLAGS_DECL;
#endif
regexp *re;
regexp_internal *rei;
regnode *startpoint;
+ U32 arg;
case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
- if (cur_eval && cur_eval->locinput==locinput) {
- if ( EVAL_CLOSE_PAREN_IS( cur_eval, (U32)ARG(scan) ) )
- Perl_croak(aTHX_ "Infinite recursion in regex");
+ arg= (U32)ARG(scan);
+ if (cur_eval && cur_eval->locinput == locinput) {
if ( ++nochange_depth > max_nochange_depth )
Perl_croak(aTHX_
"Pattern subroutine nesting without pos change"
re = rex;
rei = rexi;
startpoint = scan + ARG2L(scan);
- EVAL_CLOSE_PAREN_SET( st, ARG(scan) ); /* ST.close_paren = 1 + ARG(scan) */
+ EVAL_CLOSE_PAREN_SET( st, arg ); /* ST.close_paren = 1 + ARG(scan) */
+ /* Detect infinite recursion
+ *
+ * A pattern like /(?R)foo/ or /(?<x>(?&y)foo)(?<y>(?&x)bar)/
+ * or "a"=~/(.(?2))((?<=(?=(?1)).))/ could recurse forever.
+ * So we track the position in the string we are at each time
+ * we recurse and if we try to enter the same routine twice from
+ * the same position we fail. This means that a pattern like
+ * "aaabbb"=~/a(?R)?b/ works as expected and does not throw an
+ * error.
+ */
+ if ( rex->recurse_locinput[arg] == locinput ) {
+ DEBUG_r({
+ GET_RE_DEBUG_FLAGS_DECL;
+ DEBUG_EXECUTE_r({
+ PerlIO_printf(Perl_debug_log,
+ "%*s pattern left-recursion without consuming input always fails...\n",
+ REPORT_CODE_OFF + depth*2, "");
+ });
+ });
+ /* this would be infinite recursion, so we fail */
+ sayNO;
+ } else {
+ ST.prev_recurse_locinput= rex->recurse_locinput[arg];
+ rex->recurse_locinput[arg]= locinput;
+ }
/* Save all the positions seen so far. */
ST.cp = regcppush(rex, 0, maxopenparen);
n = ARG(scan);
if (rexi->data->what[n] == 'r') { /* code from an external qr */
- newcv = (ReANY(
- (REGEXP*)(rexi->data->data[n])
- ))->qr_anoncv
- ;
+ newcv = (ReANY(
+ (REGEXP*)(rexi->data->data[n])
+ ))->qr_anoncv;
nop = (OP*)rexi->data->data[n+1];
}
else if (rexi->data->what[n] == 'l') { /* literal code */
startpoint = rei->program + 1;
EVAL_CLOSE_PAREN_CLEAR(st); /* ST.close_paren = 0;
* close_paren only for GOSUB */
+ ST.prev_recurse_locinput= NULL; /* only used for GOSUB */
/* Save all the seen positions so far. */
ST.cp = regcppush(rex, 0, maxopenparen);
REGCP_SET(ST.lastcp);
case EVAL_AB: /* cleanup after a successful (??{A})B */
/* note: this is called twice; first after popping B, then A */
+ if ( cur_eval && cur_eval->u.eval.close_paren )
+ rex->recurse_locinput[cur_eval->u.eval.close_paren - 1] = cur_eval->u.eval.prev_recurse_locinput;
+
rex_sv = ST.prev_rex;
is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
SET_reg_curpm(rex_sv);
case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
/* note: this is called twice; first after popping B, then A */
+ if ( cur_eval && cur_eval->u.eval.close_paren )
+ rex->recurse_locinput[cur_eval->u.eval.close_paren - 1] = cur_eval->u.eval.prev_recurse_locinput;
+
rex_sv = ST.prev_rex;
is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
SET_reg_curpm(rex_sv);
fake_end:
if (cur_eval) {
/* we've just finished A in /(??{A})B/; now continue with B */
+ if ( cur_eval->u.eval.close_paren )
+ rex->recurse_locinput[cur_eval->u.eval.close_paren - 1] = cur_eval->u.eval.prev_recurse_locinput;
st->u.eval.prev_rex = rex_sv; /* inner */
const struct regexp_engine* engine; \
REGEXP *mother_re; /* what re is this a lightweight copy of? */ \
HV *paren_names; /* Optional hash of paren names */ \
+ /*--------------------------------------------------------*/ \
/* Information about the match that the perl core uses to */ \
/* manage things */ \
U32 extflags; /* Flags used both externally and internally */ \
U32 intflags; /* Engine Specific Internal flags */ \
void *pprivate; /* Data private to the regex engine which */ \
/* created this object. */ \
+ /*--------------------------------------------------------*/ \
/* Data about the last/current match. These are modified */ \
/* during matching */ \
U32 lastparen; /* last open paren matched */ \
U32 lastcloseparen; /* last close paren matched */ \
/* Array of offsets for (@-) and (@+) */ \
regexp_paren_pair *offs; \
+ char **recurse_locinput; /* used to detect infinite recursion, XXX: move to internal */ \
+ /*--------------------------------------------------------*/ \
/* saved or original string so \digit works forever. */ \
char *subbeg; \
SV_SAVED_COPY /* If non-NULL, SV which is COW from original */\
SSize_t subcoffset; /* suboffset equiv, but in chars (for @-/@+) */ \
/* Information about the match that isn't often used */ \
SSize_t maxlen; /* mininum possible number of chars in string to match */\
+ /*--------------------------------------------------------*/ \
/* offset from wrapped to the start of precomp */ \
PERL_BITFIELD32 pre_prefix:4; \
/* original flags used to compile the pattern, may differ */ \
/* from extflags in various ways */ \
PERL_BITFIELD32 compflags:9; \
+ /*--------------------------------------------------------*/ \
CV *qr_anoncv /* the anon sub wrapped round qr/(?{..})/ */
typedef struct regexp {
/* structures for holding and saving the state maintained by regmatch() */
#ifndef MAX_RECURSE_EVAL_NOCHANGE_DEPTH
-#define MAX_RECURSE_EVAL_NOCHANGE_DEPTH 1000
+#define MAX_RECURSE_EVAL_NOCHANGE_DEPTH 10
#endif
typedef I32 CHECKPOINT;
CHECKPOINT lastcp;
U32 close_paren; /* which close bracket is our end (+1) */
regnode *B; /* the node following us */
+ char *prev_recurse_locinput;
} eval;
struct {
skip_all_without_unicode_tables();
}
-plan tests => 776; # Update this when adding/deleting tests.
+plan tests => 781; # Update this when adding/deleting tests.
run_tests() unless caller;
fresh_perl_is($code, "", {},
"perl [#126406] panic");
}
+ {
+ # [perl #126182] test for infinite pattern recursion
+ ok("aaabbb"=~/a(?R)?b/, "optional self recursion works");
+ ok("aaabbb"=~/a(?R)?b/, "optional self recursion works");
+ ok(not("aa"=~/(?R)a/), "left-recursion fails fast");
+ ok("bbaa"=~/(?&x)(?(DEFINE)(?<x>(?&y)*a)(?<y>(?&x)*b))/,"inter-cyclic optional left recursion works");
+ ok(not("a"=~/(.(?2))((?<=(?=(?1)).))/),"look ahead left-recursion fails fast");
+ }
} # End of sub run_tests
1;