I32 seen_zerolen;
regnode **open_parens; /* pointers to open parens */
regnode **close_parens; /* pointers to close parens */
- regnode *opend; /* END node in program */
+ regnode *end_op; /* END node in program */
I32 utf8; /* whether the pattern is utf8 or not */
I32 orig_utf8; /* whether the pattern was originally in utf8 */
/* XXX use this for future optimisation of case
HV *paren_names; /* Paren names */
regnode **recurse; /* Recurse regops */
- I32 recurse_count; /* Number of recurse regops */
+ I32 recurse_count; /* Number of recurse regops we have generated */
U8 *study_chunk_recursed; /* bitmap of which subs we have moved
through */
U32 study_chunk_recursed_bytes; /* bytes in bitmap */
#define RExC_orig_utf8 (pRExC_state->orig_utf8)
#define RExC_open_parens (pRExC_state->open_parens)
#define RExC_close_parens (pRExC_state->close_parens)
-#define RExC_opend (pRExC_state->opend)
+#define RExC_end_op (pRExC_state->end_op)
#define RExC_paren_names (pRExC_state->paren_names)
#define RExC_recurse (pRExC_state->recurse)
#define RExC_recurse_count (pRExC_state->recurse_count)
if (RExC_seen & REG_UNFOLDED_MULTI_SEEN) \
PerlIO_printf(Perl_debug_log,"REG_UNFOLDED_MULTI_SEEN "); \
\
- if (RExC_seen & REG_GOSTART_SEEN) \
- PerlIO_printf(Perl_debug_log,"REG_GOSTART_SEEN "); \
- \
if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN) \
PerlIO_printf(Perl_debug_log,"REG_UNBOUNDED_QUANTIFIER_SEEN "); \
\
DEBUG_TRIE_COMPILE_r({
regprop(RExC_rx, RExC_mysv, tail, NULL, pRExC_state);
- PerlIO_printf( Perl_debug_log, "%*s%s%s\n",
+ PerlIO_printf( Perl_debug_log, "%*s%s %d:%s\n",
(int)depth * 2 + 2, "",
- "Looking for TRIE'able sequences. Tail node is: ",
+ "Looking for TRIE'able sequences. Tail node is ",
+ tail - RExC_emit_start,
SvPV_nolen_const( RExC_mysv )
);
});
U8 noper_trietype = TRIE_TYPE( noper_type );
#if defined(DEBUGGING) || defined(NOJUMPTRIE)
regnode * const noper_next = regnext( noper );
- U8 noper_next_type = (noper_next && noper_next != tail) ? OP(noper_next) : 0;
- U8 noper_next_trietype = (noper_next && noper_next != tail) ? TRIE_TYPE( noper_next_type ) :0;
+ U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
+ U8 noper_next_trietype = (noper_next && noper_next < tail) ? TRIE_TYPE( noper_next_type ) :0;
#endif
DEBUG_TRIE_COMPILE_r({
regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
- PerlIO_printf( Perl_debug_log, "%*s- %s (%d)",
- (int)depth * 2 + 2,"", SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) );
+ PerlIO_printf( Perl_debug_log, "%*s- %d:%s (%d)",
+ (int)depth * 2 + 2,"",
+ REG_NODE_NUM(cur), SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) );
regprop(RExC_rx, RExC_mysv, noper, NULL, pRExC_state);
- PerlIO_printf( Perl_debug_log, " -> %s",
- SvPV_nolen_const(RExC_mysv));
+ PerlIO_printf( Perl_debug_log, " -> %d:%s",
+ REG_NODE_NUM(noper), SvPV_nolen_const(RExC_mysv));
if ( noper_next ) {
regprop(RExC_rx, RExC_mysv, noper_next, NULL, pRExC_state);
- PerlIO_printf( Perl_debug_log,"\t=> %s\t",
- SvPV_nolen_const(RExC_mysv));
+ PerlIO_printf( Perl_debug_log,"\t=> %d:%s\t",
+ REG_NODE_NUM(noper_next), SvPV_nolen_const(RExC_mysv));
}
- PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d,tt==%s,nt==%s,nnt==%s)\n",
+ PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d,tt==%s,ntt==%s,nntt==%s)\n",
REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur),
PL_reg_name[trietype], PL_reg_name[noper_trietype], PL_reg_name[noper_next_trietype]
);
if ( noper_trietype
&&
(
- ( noper_trietype == NOTHING)
+ ( noper_trietype == NOTHING )
|| ( trietype == NOTHING )
|| ( trietype == noper_trietype )
)
if ( noper_trietype == NOTHING ) {
#if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
regnode * const noper_next = regnext( noper );
- U8 noper_next_type = (noper_next && noper_next!=tail) ? OP(noper_next) : 0;
+ U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
#endif
DEBUG_TRIE_COMPILE_r({
regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
PerlIO_printf( Perl_debug_log,
- "%*s- %s (%d) <SCAN FINISHED>\n",
+ "%*s- %s (%d) <SCAN FINISHED> ",
(int)depth * 2 + 2,
"", SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur));
+ PerlIO_printf( Perl_debug_log, "(First==%d, Last==%d, Cur==%d, tt==%s)\n",
+ REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur),
+ PL_reg_name[trietype]
+ );
});
if ( last && trietype ) {
} else /* single branch is optimized. */
scan = NEXTOPER(scan);
continue;
- } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB || OP(scan) == GOSTART) {
+ } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB) {
I32 paren = 0;
regnode *start = NULL;
regnode *end = NULL;
U32 my_recursed_depth= recursed_depth;
-
- if (OP(scan) != SUSPEND) { /* GOSUB/GOSTART */
+ if (OP(scan) != SUSPEND) { /* GOSUB */
/* Do setup, note this code has side effects beyond
* the rest of this block. Specifically setting
* RExC_recurse[] must happen at least once during
* study_chunk(). */
- if (OP(scan) == GOSUB) {
- paren = ARG(scan);
- RExC_recurse[ARG2L(scan)] = scan;
- start = RExC_open_parens[paren-1];
- end = RExC_close_parens[paren-1];
- } else {
- start = RExC_rxi->program + 1;
- end = RExC_opend;
- }
+ paren = ARG(scan);
+ RExC_recurse[ARG2L(scan)] = scan;
+ start = RExC_open_parens[paren];
+ end = RExC_close_parens[paren];
+
/* NOTE we MUST always execute the above code, even
- * if we do nothing with a GOSUB/GOSTART */
+ * if we do nothing with a GOSUB */
if (
( flags & SCF_IN_DEFINE )
||
RExC_study_chunk_recursed_bytes, U8);
}
/* we havent recursed into this paren yet, so recurse into it */
- DEBUG_STUDYDATA("set:", data,depth);
+ DEBUG_STUDYDATA("gosub-set:", data,depth);
PAREN_SET(RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes), paren);
my_recursed_depth= recursed_depth + 1;
} else {
- DEBUG_STUDYDATA("inf:", data,depth);
+ DEBUG_STUDYDATA("gosub-inf:", data,depth);
/* some form of infinite recursion, assume infinite length
* */
if (flags & SCF_DO_SUBSTR) {
if (OP(nxt) != CLOSE)
goto nogo;
if (RExC_open_parens) {
- RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/
- RExC_close_parens[ARG(nxt1)-1]=nxt+2; /*close->while*/
+ RExC_open_parens[ARG(nxt1)]=oscan; /*open->CURLYM*/
+ RExC_close_parens[ARG(nxt1)]=nxt+2; /*close->while*/
}
/* Now we know that nxt2 is the only contents: */
oscan->flags = (U8)ARG(nxt);
oscan->flags = (U8)ARG(nxt);
if (RExC_open_parens) {
- RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/
- RExC_close_parens[ARG(nxt1)-1]=nxt2+1; /*close->NOTHING*/
+ RExC_open_parens[ARG(nxt1)]=oscan; /*open->CURLYM*/
+ RExC_close_parens[ARG(nxt1)]=nxt2+1; /*close->NOTHING*/
}
OP(nxt1) = OPTIMIZED; /* was OPEN. */
OP(nxt) = OPTIMIZED; /* was CLOSE. */
RExC_whilem_seen = 0;
RExC_open_parens = NULL;
RExC_close_parens = NULL;
- RExC_opend = NULL;
+ RExC_end_op = NULL;
RExC_paren_names = NULL;
#ifdef DEBUGGING
RExC_paren_name_list = NULL;
r->intflags = 0;
r->nparens = RExC_npar - 1; /* set early to validate backrefs */
- /* setup various meta data about recursion, this all requires
- * RExC_npar to be correctly set, and a bit later on we clear it */
- if (RExC_seen & REG_RECURSE_SEEN) {
- Newxz(RExC_open_parens, RExC_npar,regnode *);
- SAVEFREEPV(RExC_open_parens);
- Newxz(RExC_close_parens,RExC_npar,regnode *);
- SAVEFREEPV(RExC_close_parens);
- }
- if (RExC_seen & (REG_RECURSE_SEEN | REG_GOSTART_SEEN)) {
- /* Note, RExC_npar is 1 + the number of parens in a pattern.
- * So its 1 if there are no parens. */
- RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) +
- ((RExC_npar & 0x07) != 0);
- Newx(RExC_study_chunk_recursed,
- RExC_study_chunk_recursed_bytes * RExC_npar, U8);
- SAVEFREEPV(RExC_study_chunk_recursed);
- }
-
/* Useful during FAIL. */
#ifdef RE_TRACK_PATTERN_OFFSETS
Newxz(ri->u.offsets, 2*RExC_size+1, U32); /* MJD 20001228 */
RExC_parse = exp;
RExC_end = exp + plen;
RExC_naughty = 0;
- RExC_npar = 1;
RExC_emit_start = ri->program;
RExC_emit = ri->program;
RExC_emit_bound = ri->program + RExC_size + 1;
pRExC_state->code_index = 0;
*((char*) RExC_emit++) = (char) REG_MAGIC;
+ /* setup various meta data about recursion, this all requires
+ * RExC_npar to be correctly set, and a bit later on we clear it */
+ if (RExC_seen & REG_RECURSE_SEEN) {
+ DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
+ "%*s%*s Setting up open/close parens\n",
+ 22, "| |", (int)(0 * 2 + 1), ""));
+
+ /* setup RExC_open_parens, which holds the address of each
+ * OPEN tag, and to make things simpler for the 0 index
+ * the start of the program - this is used later for offsets */
+ Newxz(RExC_open_parens, RExC_npar,regnode *);
+ SAVEFREEPV(RExC_open_parens);
+ RExC_open_parens[0] = RExC_emit;
+
+ /* setup RExC_close_parens, which holds the address of each
+ * CLOSE tag, and to make things simpler for the 0 index
+ * the end of the program - this is used later for offsets */
+ Newxz(RExC_close_parens, RExC_npar,regnode *);
+ SAVEFREEPV(RExC_close_parens);
+ /* we dont know where end op starts yet, so we dont
+ * need to set RExC_close_parens[0] like we do RExC_open_parens[0] above */
+
+ /* Note, RExC_npar is 1 + the number of parens in a pattern.
+ * So its 1 if there are no parens. */
+ RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) +
+ ((RExC_npar & 0x07) != 0);
+ Newx(RExC_study_chunk_recursed,
+ RExC_study_chunk_recursed_bytes * RExC_npar, U8);
+ SAVEFREEPV(RExC_study_chunk_recursed);
+ }
+ RExC_npar = 1;
if (reg(pRExC_state, 0, &flags,1) == NULL) {
ReREFCNT_dec(rx);
Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags);
}
+ DEBUG_OPTIMISE_r(
+ PerlIO_printf(Perl_debug_log, "Starting post parse optimization\n");
+ );
+
/* XXXX To minimize changes to RE engine we always allocate
3-units-long substrs field. */
Newx(r->substrs, 1, struct reg_substr_data);
if (r->minlen < minlen)
r->minlen = minlen;
+ if (RExC_seen & REG_RECURSE_SEEN ) {
+ r->intflags |= PREGf_RECURSE_SEEN;
+ Newxz(r->recurse_locinput, r->nparens + 1, char *);
+ }
if (RExC_seen & REG_GPOS_SEEN)
r->intflags |= PREGf_GPOS_SEEN;
if (RExC_seen & REG_LOOKBEHIND_SEEN)
= (void*)SvREFCNT_inc(RExC_paren_name_list);
} else
#endif
- ri->name_list_idx = 0;
+ ri->name_list_idx = 0;
- if (RExC_recurse_count) {
- for ( ; RExC_recurse_count ; RExC_recurse_count-- ) {
- const regnode *scan = RExC_recurse[RExC_recurse_count-1];
- ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan );
- }
+ while ( RExC_recurse_count > 0 ) {
+ const regnode *scan = RExC_recurse[ --RExC_recurse_count ];
+ ARG2L_SET( scan, RExC_open_parens[ARG(scan)] - scan );
}
+
Newxz(r->offs, RExC_npar, regexp_paren_pair);
/* assume we don't need to swap parens around before we match */
DEBUG_TEST_r({
break;
case '0' : /* (?0) */
case 'R' : /* (?R) */
- if (*RExC_parse != ')')
+ if (RExC_parse == RExC_end || *RExC_parse != ')')
FAIL("Sequence (?R) not terminated");
- ret = reg_node(pRExC_state, GOSTART);
- RExC_seen |= REG_GOSTART_SEEN;
+ num = 0;
+ RExC_seen |= REG_RECURSE_SEEN;
*flagp |= POSTPONED;
- nextchar(pRExC_state);
- return ret;
+ goto gen_recurse_regop;
/*notreached*/
/* named and numeric backreferences */
case '&': /* (?&NAME) */
} else if ( paren == '+' ) {
num = RExC_npar + num - 1;
}
+ /* We keep track how many GOSUB items we have produced.
+ To start off the ARG2L() of the GOSUB holds its "id",
+ which is used later in conjunction with RExC_recurse
+ to calculate the offset we need to jump for the GOSUB,
+ which it will store in the final representation.
+ We have to defer the actual calculation until much later
+ as the regop may move.
+ */
ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
if (!SIZE_ONLY) {
(UV)ARG(ret), (IV)ARG2L(ret)));
}
RExC_seen |= REG_RECURSE_SEEN;
+
Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */
Set_Node_Offset(ret, parse_start); /* MJD */
*flagp |= POSTPONED;
+ assert(*RExC_parse == ')');
nextchar(pRExC_state);
return ret;
}
else if (RExC_parse[0] == 'R') {
RExC_parse++;
+ /* parno == 0 => /(?(R)YES|NO)/ "in any form of recursion OR eval"
+ * parno == 1 => /(?(R0)YES|NO)/ "in GOSUB (?0) / (?R)"
+ * parno == 2 => /(?(R1)YES|NO)/ "in GOSUB (?1) (parno-1)"
+ */
parno = 0;
- if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
+ if (RExC_parse[0] == '0') {
+ parno = 1;
+ RExC_parse++;
+ }
+ else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
UV uv;
if (grok_atoUV(RExC_parse, &uv, &endptr)
&& uv <= I32_MAX
) {
- parno = (I32)uv;
+ parno = (I32)uv + 1;
RExC_parse = (char*)endptr;
}
/* else "Switch condition not recognized" below */
SIZE_ONLY
? REG_RSN_RETURN_NULL
: REG_RSN_RETURN_DATA);
- parno = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
+
+ /* we should only have a false sv_dat when
+ * SIZE_ONLY is true, and we always have false
+ * sv_dat when SIZE_ONLY is true.
+ * reg_scan_name() will VFAIL() if the name is
+ * unknown when SIZE_ONLY is false, and otherwise
+ * will return something, and when SIZE_ONLY is
+ * true, reg_scan_name() just parses the string,
+ * and doesnt return anything. (in theory) */
+ assert(SIZE_ONLY ? !sv_dat : !!sv_dat);
+
+ if (sv_dat)
+ parno = 1 + *((I32 *)SvPVX(sv_dat));
}
ret = reganode(pRExC_state,INSUBP,parno);
goto insert_if_check_paren;
if (!SIZE_ONLY ){
if (!RExC_nestroot)
RExC_nestroot = parno;
- if (RExC_seen & REG_RECURSE_SEEN
- && !RExC_open_parens[parno-1])
+ if (RExC_open_parens && !RExC_open_parens[parno])
{
DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
"%*s%*s Setting open paren #%"IVdf" to %d\n",
22, "| |", (int)(depth * 2 + 1), "",
(IV)parno, REG_NODE_NUM(ret)));
- RExC_open_parens[parno-1]= ret;
+ RExC_open_parens[parno]= ret;
}
}
Set_Node_Length(ret, 1); /* MJD */
break;
case 1: case 2:
ender = reganode(pRExC_state, CLOSE, parno);
- if (!SIZE_ONLY && RExC_seen & REG_RECURSE_SEEN) {
+ if ( RExC_close_parens ) {
DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
"%*s%*s Setting close paren #%"IVdf" to %d\n",
22, "| |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ender)));
- RExC_close_parens[parno-1]= ender;
+ RExC_close_parens[parno]= ender;
if (RExC_nestroot == parno)
RExC_nestroot = 0;
}
case 0:
ender = reg_node(pRExC_state, END);
if (!SIZE_ONLY) {
- assert(!RExC_opend); /* there can only be one! */
- RExC_opend = ender;
+ assert(!RExC_end_op); /* there can only be one! */
+ RExC_end_op = ender;
+ if (RExC_close_parens) {
+ DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
+ "%*s%*s Setting close paren #0 (END) to %d\n",
+ 22, "| |", (int)(depth * 2 + 1), "", REG_NODE_NUM(ender)));
+
+ RExC_close_parens[0]= ender;
+ }
}
break;
}
}
else if (value == '\\') {
/* Is a backslash; get the code point of the char after it */
+
+ if (RExC_parse >= RExC_end) {
+ vFAIL("Unmatched [");
+ }
+
if (UTF && ! UTF8_IS_INVARIANT(UCHARAT(RExC_parse))) {
value = utf8n_to_uvchr((U8*)RExC_parse,
RExC_end - RExC_parse,
* discussed in commit
* 2f833f5208e26b208886e51e09e2c072b5eabb46 */
name = savepv(Perl_form(aTHX_ "%.*s", (int)n, RExC_parse));
+ SAVEFREEPV(name);
if (FOLD) {
lookup_name = savepv(Perl_form(aTHX_ "__%s_i", name));
}
? "Illegal user-defined property name"
: "Can't find Unicode property definition";
RExC_parse = e + 1;
- SAVEFREEPV(name);
/* diag_listed_as: Can't find Unicode property definition "%s" */
vFAIL3utf8f("%s \"%"UTF8f"\"",
pkgname,
name);
n = strlen(full_name);
- Safefree(name);
name = savepvn(full_name, n);
+ SAVEFREEPV(name);
}
}
Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s%"UTF8f"%s\n",
_invlist_union(properties, invlist, &properties);
}
}
- Safefree(name);
}
RExC_parse = e + 1;
namedclass = ANYOF_UNIPROP; /* no official name, but it's
}
}
}
+ if (RExC_end_op)
+ RExC_end_op += size;
while (src > opnd) {
StructCopy(--src, --dst, regnode);
}
/* Paren and offset */
- Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o));
+ Perl_sv_catpvf(aTHX_ sv, "%d[%+d:%d]", (int)ARG(o),(int)ARG2L(o),
+ (int)((o + (int)ARG2L(o)) - progi->program) );
if (name_list) {
SV **name= av_fetch(name_list, ARG(o), 0 );
if (name)
#endif
Safefree(r->offs);
SvREFCNT_dec(r->qr_anoncv);
+ if (r->recurse_locinput)
+ Safefree(r->recurse_locinput);
rx->sv_u.svu_rx = 0;
}
#endif
ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
SvREFCNT_inc_void(ret->qr_anoncv);
+ if (r->recurse_locinput)
+ Newxz(ret->recurse_locinput,r->nparens + 1,char *);
return ret_x;
}
#define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL)
/*
- re_dup - duplicate a regexp.
+ re_dup_guts - duplicate a regexp.
This routine is expected to clone a given regexp structure. It is only
compiled under USE_ITHREADS.
RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
ret->qr_anoncv = MUTABLE_CV(sv_dup_inc((const SV *)ret->qr_anoncv, param));
+ if (r->recurse_locinput)
+ Newxz(ret->recurse_locinput,r->nparens + 1,char *);
if (ret->pprivate)
RXi_SET(ret,CALLREGDUPE_PVT(dstr,param));
char, regexp_internal);
Copy(ri->program, reti->program, len+1, regnode);
+
reti->num_code_blocks = ri->num_code_blocks;
if (ri->code_blocks) {
int n;
d->data[i] = ri->data->data[i];
break;
default:
- Perl_croak(aTHX_ "panic: re_dup unknown data code '%c'",
+ Perl_croak(aTHX_ "panic: re_dup_guts unknown data code '%c'",
ri->data->what[i]);
}
}