I32 nestroot; /* root parens we are in - used by accept */
I32 extralen;
I32 seen_zerolen;
- I32 seen_evals;
regnode **open_parens; /* pointers to open parens */
regnode **close_parens; /* pointers to close parens */
regnode *opend; /* END node in program */
#define RExC_nestroot (pRExC_state->nestroot)
#define RExC_extralen (pRExC_state->extralen)
#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
-#define RExC_seen_evals (pRExC_state->seen_evals)
#define RExC_utf8 (pRExC_state->utf8)
#define RExC_uni_semantics (pRExC_state->uni_semantics)
#define RExC_orig_utf8 (pRExC_state->orig_utf8)
int n = 0;
STRLEN s;
char *p, *newpat;
- int newlen = plen + 5; /* allow for "qr''x" extra chars */
+ int newlen = plen + 6; /* allow for "qr''x\0" extra chars */
SV *sv, *qr_ref;
dSP;
REGEXP *
Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
OP *expr, const regexp_engine* eng, REGEXP *VOL old_re,
- int *is_bare_re, U32 orig_rx_flags, U32 pm_flags)
+ bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags)
{
dVAR;
REGEXP *rx;
pRExC_state->num_code_blocks = 0;
if (is_bare_re)
- *is_bare_re = 0;
+ *is_bare_re = FALSE;
if (expr && (expr->op_type == OP_LIST ||
(expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) {
OP *o = NULL;
int n = 0;
bool utf8 = 0;
+ STRLEN orig_patlen = 0;
if (pRExC_state->num_code_blocks) {
o = cLISTOPx(expr)->op_first;
o = o->op_sibling;;
}
+ if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
+ (sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
+ {
+ sv_setsv(pat, sv);
+ /* overloading involved: all bets are off over literal
+ * code. Pretend we haven't seen it */
+ pRExC_state->num_code_blocks -= n;
+ n = 0;
+ rx = NULL;
+
+ }
+ else {
+ while (SvAMAGIC(msv)
+ && (sv = AMG_CALLunary(msv, string_amg))
+ && sv != msv)
+ {
+ msv = sv;
+ SvGETMAGIC(msv);
+ }
+ if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP)
+ msv = SvRV(msv);
+ orig_patlen = SvCUR(pat);
+ sv_catsv_nomg(pat, msv);
+ rx = msv;
+ if (code)
+ pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
+ }
+
/* extract any code blocks within any embedded qr//'s */
- rx = msv;
- if (SvROK(rx))
- rx = SvRV(rx);
- if (SvTYPE(rx) == SVt_REGEXP
+ if (rx && SvTYPE(rx) == SVt_REGEXP
&& RX_ENGINE((REGEXP*)rx)->op_comp)
{
pRExC_state->num_code_blocks += ri->num_code_blocks;
for (i=0; i < ri->num_code_blocks; i++) {
struct reg_code_block *src, *dst;
- STRLEN offset = SvCUR(pat)
+ STRLEN offset = orig_patlen
+ ((struct regexp *)SvANY(rx))->pre_prefix;
assert(n < pRExC_state->num_code_blocks);
src = &ri->code_blocks[i];
}
}
}
-
- if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
- (sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
- {
- sv_setsv(pat, sv);
- /* overloading involved: all bets are off over literal
- * code. Pretend we haven't seen it */
- pRExC_state->num_code_blocks -= n;
- n = 0;
-
- }
- else {
- if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP) {
- msv = SvRV(msv);
- PL_reginterp_cnt +=
- RX_SEEN_EVALS((REGEXP *)MUTABLE_PTR(msv));
- }
- sv_catsv_nomg(pat, msv);
- if (code)
- pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
- }
}
SvSETMAGIC(pat);
}
- else
+ else {
+ SV *sv;
pat = *patternp;
+ while (SvAMAGIC(pat)
+ && (sv = AMG_CALLunary(pat, string_amg))
+ && sv != pat)
+ {
+ pat = sv;
+ SvGETMAGIC(pat);
+ }
+ }
/* handle bare regex: foo =~ $re */
{
re = SvRV(re);
if (SvTYPE(re) == SVt_REGEXP) {
if (is_bare_re)
- *is_bare_re = 1;
+ *is_bare_re = TRUE;
SvREFCNT_inc(re);
Safefree(pRExC_state->code_blocks);
return (REGEXP*)re;
RExC_seen = 0;
RExC_in_lookbehind = 0;
RExC_seen_zerolen = *exp == '^' ? -1 : 0;
- RExC_seen_evals = 0;
RExC_extralen = 0;
RExC_override_recoding = 0;
RExC_emit_bound = ri->program + RExC_size + 1;
pRExC_state->code_index = 0;
- /* Store the count of eval-groups for security checks: */
- RExC_rx->seen_evals = RExC_seen_evals;
REGC((U8)REG_MAGIC, (char*) RExC_emit++);
if (reg(pRExC_state, 0, &flags,1) == NULL) {
ReREFCNT_dec(rx);
else if ((!sawopen || !RExC_sawback) &&
(OP(first) == STAR &&
PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
- !(r->extflags & RXf_ANCH) && !(RExC_seen & REG_SEEN_EVAL))
+ !(r->extflags & RXf_ANCH) && !pRExC_state->num_code_blocks)
{
/* turn .* into ^.* with an implied $*=1 */
const int type =
goto again;
}
if (sawplus && !sawlookahead && (!sawopen || !RExC_sawback)
- && !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
+ && !pRExC_state->num_code_blocks) /* May examine pos and $& */
/* x+ must match at the 1st pos of run of x's */
r->intflags |= PREGf_SKIP;
r->extflags |= RXf_GPOS_SEEN;
if (RExC_seen & REG_SEEN_LOOKBEHIND)
r->extflags |= RXf_LOOKBEHIND_SEEN;
- if (RExC_seen & REG_SEEN_EVAL)
+ if (pRExC_state->num_code_blocks)
r->extflags |= RXf_EVAL_SEEN;
if (RExC_seen & REG_SEEN_CANY)
r->extflags |= RXf_CANY_SEEN;
r->intflags |= PREGf_VERBARG_SEEN;
if (RExC_seen & REG_SEEN_CUTGROUP)
r->intflags |= PREGf_CUTGROUP_SEEN;
+ if (pm_flags & PMf_USE_RE_EVAL)
+ r->intflags |= PREGf_USE_RE_EVAL;
if (RExC_paren_names)
RXp_PAREN_NAMES(r) = MUTABLE_HV(SvREFCNT_inc(RExC_paren_names));
else
Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
(unsigned long) flags);
}
- /* NOT REACHED */
+ assert(0); /* NOT REACHED */
}
return NULL;
}
num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
}
goto gen_recurse_regop;
- /* NOT REACHED */
+ assert(0); /* NOT REACHED */
case '+':
if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
RExC_parse++;
nextchar(pRExC_state);
return ret;
} /* named and numeric backreferences */
- /* NOT REACHED */
+ assert(0); /* NOT REACHED */
case '?': /* (??...) */
is_logical = 1;
struct reg_code_block *cb;
RExC_seen_zerolen++;
- RExC_seen |= REG_SEEN_EVAL;
if ( !pRExC_state->num_code_blocks
|| pRExC_state->code_index >= pRExC_state->num_code_blocks
/* this is a pre-compiled code block (?{...}) */
cb = &pRExC_state->code_blocks[pRExC_state->code_index];
RExC_parse = RExC_start + cb->end;
- if (SIZE_ONLY)
- RExC_seen_evals++;
- else {
+ if (!SIZE_ONLY) {
OP *o = cb->block;
if (cb->src_regex) {
n = add_data(pRExC_state, 2, "rl");
nextchar(pRExC_state);
if (is_logical) {
+ regnode *eval;
ret = reg_node(pRExC_state, LOGICAL);
- if (!SIZE_ONLY)
+ eval = reganode(pRExC_state, EVAL, n);
+ if (!SIZE_ONLY) {
ret->flags = 2;
- REGTAIL(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
+ /* for later propagation into (??{}) return value */
+ eval->flags = (U8) (RExC_flags & RXf_PMf_COMPILETIME);
+ }
+ REGTAIL(pRExC_state, ret, eval);
/* deal with the length of this later - MJD */
return ret;
}
}
else
FAIL("Junk on end of regexp"); /* "Can't happen". */
- /* NOTREACHED */
+ assert(0); /* NOTREACHED */
}
if (RExC_in_lookbehind) {
sequence, we return.
Note: we have to be careful with escapes, as they can be both literal
- and special, and in the case of \10 and friends can either, depending
- on context. Specifically there are two separate switches for handling
+ and special, and in the case of \10 and friends, context determines which.
+
+ A summary of the code structure is:
+
+ switch (first_byte) {
+ cases for each special:
+ handle this special;
+ break;
+ case '\\':
+ switch (2nd byte) {
+ cases for each unambiguous special:
+ handle this special;
+ break;
+ cases for each ambigous special/literal:
+ disambiguate;
+ if (special) handle here
+ else goto defchar;
+ default: // unambiguously literal:
+ goto defchar;
+ }
+ default: // is a literal char
+ // FALL THROUGH
+ defchar:
+ create EXACTish node for literal;
+ while (more input and node isn't full) {
+ switch (input_byte) {
+ cases for each special;
+ make sure parse pointer is set so that the next call to
+ regatom will see this special first
+ goto loopdone; // EXACTish node terminated by prev. char
+ default:
+ append char to EXACTISH node;
+ }
+ get next input byte;
+ }
+ loopdone:
+ }
+ return the generated node;
+
+ Specifically there are two separate switches for handling
escape sequences, with the one for handling literal escapes requiring
a dummy entry for all of the special escapes that are actually handled
by the other.
vFAIL("Reference to nonexistent or unclosed group");
}
if (!isg && num > 9 && num >= RExC_npar)
+ /* Probably a character specified in octal, e.g. \35 */
goto defchar;
else {
char * const parse_start = RExC_parse - 1; /* MJD */
break;
}
case 'x':
- if (*++p == '{') {
- char* const e = strchr(p, '}');
+ {
+ STRLEN brace_len = len;
+ UV result;
+ const char* error_msg;
- if (!e) {
- RExC_parse = p + 1;
- vFAIL("Missing right brace on \\x{}");
+ bool valid = grok_bslash_x(p,
+ &result,
+ &brace_len,
+ &error_msg,
+ 1);
+ p += brace_len;
+ if (! valid) {
+ RExC_parse = p; /* going to die anyway; point
+ to exact spot of failure */
+ vFAIL(error_msg);
}
else {
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
- | PERL_SCAN_DISALLOW_PREFIX;
- STRLEN numlen = e - p - 1;
- ender = grok_hex(p + 1, &numlen, &flags, NULL);
- if (ender > 0xff)
- REQUIRE_UTF8;
- p = e + 1;
+ ender = result;
}
+ if (PL_encoding && ender < 0x100) {
+ goto recode_encoding;
+ }
+ if (ender > 0xff) {
+ REQUIRE_UTF8;
+ }
+ break;
}
- else {
- I32 flags = PERL_SCAN_DISALLOW_PREFIX;
- STRLEN numlen = 2;
- ender = grok_hex(p, &numlen, &flags, NULL);
- p += numlen;
- }
- if (PL_encoding && ender < 0x100)
- goto recode_encoding;
- break;
case 'c':
p++;
ender = grok_bslash_c(*p++, UTF, SIZE_ONLY);
break;
case '0': case '1': case '2': case '3':case '4':
- case '5': case '6': case '7': case '8':case '9':
+ case '5': case '6': case '7':
if (*p == '0' ||
(isDIGIT(p[1]) && atoi(p) >= RExC_npar))
{
FAIL("Trailing \\");
/* FALL THROUGH */
default:
- if (!SIZE_ONLY&& isALPHA(*p)) {
+ if (!SIZE_ONLY&& isALNUMC(*p)) {
ckWARN2reg(p + 1, "Unrecognized escape \\%.1s passed through", p);
}
goto normal_default;
}
break;
case 'x':
- if (*RExC_parse == '{') {
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
- | PERL_SCAN_DISALLOW_PREFIX;
- char * const e = strchr(RExC_parse++, '}');
- if (!e)
- vFAIL("Missing right brace on \\x{}");
-
- numlen = e - RExC_parse;
- value = grok_hex(RExC_parse, &numlen, &flags, NULL);
- RExC_parse = e + 1;
- }
- else {
- I32 flags = PERL_SCAN_DISALLOW_PREFIX;
- numlen = 2;
- value = grok_hex(RExC_parse, &numlen, &flags, NULL);
+ RExC_parse--; /* function expects to be pointed at the 'x' */
+ {
+ const char* error_msg;
+ bool valid = grok_bslash_x(RExC_parse,
+ &value,
+ &numlen,
+ &error_msg,
+ 1);
RExC_parse += numlen;
+ if (! valid) {
+ vFAIL(error_msg);
+ }
}
if (PL_encoding && value < 0x100)
goto recode_encoding;
Copy(&PL_reg_state, state, 1, struct re_save_state);
- PL_reg_start_tmp = 0;
- PL_reg_start_tmpl = 0;
PL_reg_oldsaved = NULL;
PL_reg_oldsavedlen = 0;
PL_reg_maxiter = 0;