/* Certain characters are output as a sequence with the first being a
* backslash. */
-#define isBACKSLASHED_PUNCT(c) \
- ((c) == '-' || (c) == ']' || (c) == '\\' || (c) == '^')
+#define isBACKSLASHED_PUNCT(c) strchr("-[]\\^", c)
struct RExC_state_t {
REPORT_LOCATION_ARGS(loc)); \
} STMT_END
-#define vWARN4dep(loc, m, a1, a2, a3) STMT_START { \
- __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN2(WARN_REGEXP,WARN_DEPRECATED), \
- m REPORT_LOCATION, \
- a1, a2, a3, \
- REPORT_LOCATION_ARGS(loc)); \
-} STMT_END
-
#define ckWARN4reg(loc, m, a1, a2, a3) STMT_START { \
__ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), \
m REPORT_LOCATION, \
#define TRIE_LIST_PUSH(state,fid,ns) STMT_START { \
if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) { \
- U32 ging = TRIE_LIST_LEN( state ) *= 2; \
+ U32 ging = TRIE_LIST_LEN( state ) * 2; \
Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
+ TRIE_LIST_LEN( state ) = ging; \
} \
TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid; \
TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).newstate = ns; \
However, this time it's not a subexpression
we care about, but the expression itself. */
&& (maxcount == REG_INFTY)
- && data && ++data->whilem_c < 16) {
+ && data) {
/* This stays as CURLYX, we can put the count/of pair. */
/* Find WHILEM (as in regexec.c) */
regnode *nxt = oscan + NEXT_OFF(oscan);
if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
nxt += ARG(nxt);
- PREVOPER(nxt)->flags = (U8)(data->whilem_c
- | (RExC_whilem_seen << 4)); /* On WHILEM */
+ nxt = PREVOPER(nxt);
+ if (nxt->flags & 0xf) {
+ /* we've already set whilem count on this node */
+ } else if (++data->whilem_c < 16) {
+ assert(data->whilem_c <= RExC_whilem_seen);
+ nxt->flags = (U8)(data->whilem_c
+ | (RExC_whilem_seen << 4)); /* On WHILEM */
+ }
}
if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
pars++;
{
int n;
- if (cbs->attached)
+ if (--cbs->refcnt > 0)
return;
- for (n = 0; n < cbs->count; n++)
- SvREFCNT_dec(cbs->cb[n].src_regex);
+ for (n = 0; n < cbs->count; n++) {
+ REGEXP *rx = cbs->cb[n].src_regex;
+ cbs->cb[n].src_regex = NULL;
+ SvREFCNT_dec(rx);
+ }
Safefree(cbs->cb);
Safefree(cbs);
}
struct reg_code_blocks *cbs;
Newx(cbs, 1, struct reg_code_blocks);
cbs->count = ncode;
- cbs->attached = FALSE;
+ cbs->refcnt = 1;
SAVEDESTRUCTOR_X(S_free_codeblocks, cbs);
if (ncode)
Newx(cbs->cb, ncode, struct reg_code_block);
while (s < *plen_p) {
append_utf8_from_native_byte(src[s], &d);
+
if (n < num_code_blocks) {
+ assert(pRExC_state->code_blocks);
if (!do_end && pRExC_state->code_blocks->cb[n].start == s) {
pRExC_state->code_blocks->cb[n].start = d - dst - 1;
assert(*(d - 1) == '(');
* different closure than last time */
*recompile_p = 1;
if (pRExC_state->code_blocks) {
- pRExC_state->code_blocks->count += ri->code_blocks->count;
+ int new_count = pRExC_state->code_blocks->count
+ + ri->code_blocks->count;
Renew(pRExC_state->code_blocks->cb,
- pRExC_state->code_blocks->count,
- struct reg_code_block);
+ new_count, struct reg_code_block);
+ pRExC_state->code_blocks->count = new_count;
}
else
pRExC_state->code_blocks = S_alloc_code_blocks(aTHX_
int n = 0;
STRLEN s;
char *p, *newpat;
- int newlen = plen + 6; /* allow for "qr''x\0" extra chars */
+ int newlen = plen + 7; /* allow for "qr''xx\0" extra chars */
SV *sv, *qr_ref;
dSP;
SV * const errsv = ERRSV;
if (SvTRUE_NN(errsv))
/* use croak_sv ? */
- Perl_croak_nocontext("%"SVf, SVfARG(errsv));
+ Perl_croak_nocontext("%" SVf, SVfARG(errsv));
}
assert(SvROK(qr_ref));
qr = SvRV(qr_ref);
if (pm_flags & PMf_IS_QR) {
ri->code_blocks = pRExC_state->code_blocks;
if (ri->code_blocks)
- /* disarm earlier SAVEDESTRUCTOR_X */
- ri->code_blocks->attached = TRUE;
+ ri->code_blocks->refcnt++;
}
{
while ( RExC_recurse_count > 0 ) {
const regnode *scan = RExC_recurse[ --RExC_recurse_count ];
+ /*
+ * This data structure is set up in study_chunk() and is used
+ * to calculate the distance between a GOSUB regopcode and
+ * the OPEN/CURLYM (CURLYM's are special and can act like OPEN's)
+ * it refers to.
+ *
+ * If for some reason someone writes code that optimises
+ * away a GOSUB opcode then the assert should be changed to
+ * an if(scan) to guard the ARG2L_SET() - Yves
+ *
+ */
+ assert(scan && OP(scan) == GOSUB);
ARG2L_SET( scan, RExC_open_parens[ARG(scan)] - scan );
}
Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
const U32 flags)
{
- AV *retarray = NULL;
SV *ret;
struct regexp *const rx = ReANY(r);
PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
- if (flags & RXapif_ALL)
- retarray=newAV();
-
if (rx && RXp_PAREN_NAMES(rx)) {
HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
if (he_str) {
IV i;
SV* sv_dat=HeVAL(he_str);
I32 *nums=(I32*)SvPVX(sv_dat);
+ AV * const retarray = (flags & RXapif_ALL) ? newAV() : NULL;
for ( i=0; i<SvIVX(sv_dat); i++ ) {
if ((I32)(rx->nparens) >= nums[i]
&& rx->offs[nums[i]].start != -1
{
AV* list = (AV*) *listp;
IV k;
- for (k = 0; k <= av_tindex_nomg(list); k++) {
+ for (k = 0; k <= av_tindex_skip_len_mg(list); k++) {
SV** c_p = av_fetch(list, k, FALSE);
UV c;
assert(c_p);
nextchar(pRExC_state);
if (max < min) { /* If can't match, warn and optimize to fail
unconditionally */
- if (SIZE_ONLY) {
-
- /* We can't back off the size because we have to reserve
- * enough space for all the things we are about to throw
- * away, but we can shrink it by the amount we are about
- * to re-use here */
- RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
- }
- else {
+ reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
+ if (PASS2) {
ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
- RExC_emit = orig_emit;
+ NEXT_OFF(orig_emit)= regarglen[OPFAIL] + NODE_STEP_REGNODE;
}
- ret = reganode(pRExC_state, OPFAIL, 0);
return ret;
}
else if (min == max && *RExC_parse == '?')
RExC_parse++; /* Skip past the '{' */
- if (! (endbrace = strchr(RExC_parse, '}'))) { /* no trailing brace */
+ endbrace = strchr(RExC_parse, '}');
+ if (! endbrace) { /* no trailing brace */
vFAIL2("Missing right brace on \\%c{}", 'N');
}
else if(!(endbrace == RExC_parse /* nothing between the {} */
}
}
+STATIC bool
+S_new_regcurly(const char *s, const char *e)
+{
+ /* This is a temporary function designed to match the most lenient form of
+ * a {m,n} quantifier we ever envision, with either number omitted, and
+ * spaces anywhere between/before/after them.
+ *
+ * If this function fails, then the string it matches is very unlikely to
+ * ever be considered a valid quantifier, so we can allow the '{' that
+ * begins it to be considered as a literal */
+
+ bool has_min = FALSE;
+ bool has_max = FALSE;
+
+ PERL_ARGS_ASSERT_NEW_REGCURLY;
+
+ if (s >= e || *s++ != '{')
+ return FALSE;
+
+ while (s < e && isSPACE(*s)) {
+ s++;
+ }
+ while (s < e && isDIGIT(*s)) {
+ has_min = TRUE;
+ s++;
+ }
+ while (s < e && isSPACE(*s)) {
+ s++;
+ }
+
+ if (*s == ',') {
+ s++;
+ while (s < e && isSPACE(*s)) {
+ s++;
+ }
+ while (s < e && isDIGIT(*s)) {
+ has_max = TRUE;
+ s++;
+ }
+ while (s < e && isSPACE(*s)) {
+ s++;
+ }
+ }
+
+ return s < e && *s == '}' && (has_min || has_max);
+}
/* Parse backref decimal value, unless it's too big to sensibly be a backref,
* in which case return I32_MAX (rather than possibly 32-bit wrapping) */
/* FALLTHROUGH */
finish_meta_pat:
+ if ( UCHARAT(RExC_parse + 1) == '{'
+ && UNLIKELY(! new_regcurly(RExC_parse + 1, RExC_end)))
+ {
+ RExC_parse += 2;
+ vFAIL("Unescaped left brace in regex is illegal here");
+ }
nextchar(pRExC_state);
Set_Node_Length(ret, 2); /* MJD */
break;
} /* End of switch on '\' */
break;
case '{':
- /* Currently we don't care if the lbrace is at the start
- * of a construct. This catches it in the middle of a
- * literal string, or when it's the first thing after
- * something like "\b" */
- if (len || (p > RExC_start && isALPHA_A(*(p -1)))) {
- RExC_parse = p + 1;
- vFAIL("Unescaped left brace in regex is illegal here");
+ /* Currently we allow an lbrace at the start of a construct
+ * without raising a warning. This is because we think we
+ * will never want such a brace to be meant to be other
+ * than taken literally. */
+ if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
+
+ /* But, we raise a fatal warning otherwise, as the
+ * deprecation cycle has come and gone. Except that it
+ * turns out that some heavily-relied on upstream
+ * software, notably GNU Autoconf, have failed to fix
+ * their uses. For these, don't make it fatal unless
+ * we anticipate using the '{' for something else.
+ * This happens after any alpha, and for a looser {m,n}
+ * quantifier specification */
+ if ( RExC_strict
+ || ( p > parse_start + 1
+ && isALPHA_A(*(p - 1))
+ && *(p - 2) == '\\')
+ || new_regcurly(p, RExC_end))
+ {
+ RExC_parse = p + 1;
+ vFAIL("Unescaped left brace in regex is "
+ "illegal here");
+ }
+ if (PASS2) {
+ ckWARNregdep(p + 1,
+ "Unescaped left brace in regex is "
+ "deprecated here (and will be fatal "
+ "in Perl 5.30), passed through");
+ }
}
goto normal_default;
case '}':
* this character again next time through, when it will be the
* only thing in its new node */
- if ((next_is_quantifier = ( LIKELY(p < RExC_end)
- && UNLIKELY(ISMULT2(p))))
- && LIKELY(len))
- {
+ next_is_quantifier = LIKELY(p < RExC_end)
+ && UNLIKELY(ISMULT2(p));
+
+ if (next_is_quantifier && LIKELY(len)) {
p = oldp;
goto loopdone;
}
no_close:
/* We output the messages even if warnings are off, because we'll fail
* the very next thing, and these give a likely diagnosis for that */
- if (posix_warnings && av_tindex_nomg(posix_warnings) >= 0) {
+ if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
}
stack, fence, fence_stack));
#endif
- top_index = av_tindex_nomg(stack);
+ top_index = av_tindex_skip_len_mg(stack);
switch (curchar) {
SV** stacked_ptr; /* Ptr to something already on 'stack' */
goto done;
case ')':
- if (av_tindex_nomg(fence_stack) < 0) {
+ if (av_tindex_skip_len_mg(fence_stack) < 0) {
RExC_parse++;
vFAIL("Unexpected ')'");
}
* may have altered the stack in the time since we earlier set
* 'top_index'. */
- top_index = av_tindex_nomg(stack);
+ top_index = av_tindex_skip_len_mg(stack);
if (top_index - fence >= 0) {
/* If the top entry on the stack is an operator, it had better
* be a '!', otherwise the entry below the top operand should
} /* End of loop parsing through the construct */
done:
- if (av_tindex_nomg(fence_stack) >= 0) {
+ if (av_tindex_skip_len_mg(fence_stack) >= 0) {
vFAIL("Unmatched (");
}
- if (av_tindex_nomg(stack) < 0 /* Was empty */
+ if (av_tindex_skip_len_mg(stack) < 0 /* Was empty */
|| ((final = av_pop(stack)) == NULL)
|| ! IS_OPERAND(final)
|| SvTYPE(final) != SVt_INVLIST
- || av_tindex_nomg(stack) >= 0) /* More left on stack */
+ || av_tindex_skip_len_mg(stack) >= 0) /* More left on stack */
{
bad_syntax:
SvREFCNT_dec(final);
AV * stack, const IV fence, AV * fence_stack)
{ /* Dumps the stacks in handle_regex_sets() */
- const SSize_t stack_top = av_tindex_nomg(stack);
- const SSize_t fence_stack_top = av_tindex_nomg(fence_stack);
+ const SSize_t stack_top = av_tindex_skip_len_mg(stack);
+ const SSize_t fence_stack_top = av_tindex_skip_len_mg(fence_stack);
SSize_t i;
PERL_ARGS_ASSERT_DUMP_REGEX_SETS_STRUCTURES;
while (1) {
if ( posix_warnings
- && av_tindex_nomg(posix_warnings) >= 0
+ && av_tindex_skip_len_mg(posix_warnings) >= 0
&& RExC_parse > not_posix_region_end)
{
/* Warnings about posix class issues are considered tentative until
* posix class, and it failed, it was a false alarm, as this
* successful one proves */
if ( posix_warnings
- && av_tindex_nomg(posix_warnings) >= 0
+ && av_tindex_skip_len_mg(posix_warnings) >= 0
&& not_posix_region_end >= RExC_parse
&& not_posix_region_end <= posix_class_end)
{
literal[d++] = (char) value;
literal[d++] = '\0';
- vWARN4dep(RExC_parse,
- "\"%.*s\" is more clearly written simply as \"%s\". "
- "This will be a fatal error in Perl 5.28",
+ vWARN4(RExC_parse,
+ "\"%.*s\" is more clearly written simply as \"%s\"",
(int) (RExC_parse - rangebegin),
rangebegin,
literal
- );
+ );
}
else if isMNEMONIC_CNTRL(value) {
- vWARN4dep(RExC_parse,
- "\"%.*s\" is more clearly written simply as \"%s\". "
- "This will be a fatal error in Perl 5.28",
+ vWARN4(RExC_parse,
+ "\"%.*s\" is more clearly written simply as \"%s\"",
(int) (RExC_parse - rangebegin),
rangebegin,
cntrl_to_mnemonic((U8) value)
- );
+ );
}
}
}
} /* End of loop through all the text within the brackets */
- if ( posix_warnings && av_tindex_nomg(posix_warnings) >= 0) {
+ if ( posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
output_or_return_posix_warnings(pRExC_state, posix_warnings,
return_posix_warnings);
}
#endif
/* Look at the longest folds first */
- for (cp_count = av_tindex_nomg(multi_char_matches);
+ for (cp_count = av_tindex_skip_len_mg(multi_char_matches);
cp_count > 0;
cp_count--)
{
{
AV* list = (AV*) *listp;
IV k;
- for (k = 0; k <= av_tindex_nomg(list); k++) {
+ for (k = 0; k <= av_tindex_skip_len_mg(list); k++) {
SV** c_p = av_fetch(list, k, FALSE);
UV c;
assert(c_p);
si = *ary; /* ary[0] = the string to initialize the swash with */
- if (av_tindex_nomg(av) >= 2) {
+ if (av_tindex_skip_len_mg(av) >= 2) {
if (only_utf8_locale_ptr
&& ary[2]
&& ary[2] != &PL_sv_undef)
* is any inversion list generated at compile time; [4]
* indicates if that inversion list has any user-defined
* properties in it. */
- if (av_tindex_nomg(av) >= 3) {
+ if (av_tindex_skip_len_mg(av) >= 3) {
invlist = ary[3];
if (SvUV(ary[4])) {
swash_init_flags |= _CORE_SWASH_INIT_USER_DEFINED_PROPERTY;
- reginsert - insert an operator in front of already-emitted operand
*
* Means relocating the operand.
+*
+* IMPORTANT NOTE - it is the *callers* responsibility to correctly
+* set up NEXT_OFF() of the inserted node if needed. Something like this:
+*
+* reginsert(pRExC, OPFAIL, orig_emit, depth+1);
+* if (PASS2)
+* NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
+*
*/
STATIC void
-S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth)
+S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
{
regnode *src;
regnode *dst;
/* note, RExC_open_parens[0] is the start of the
* regex, it can't move. RExC_close_parens[0] is the end
* of the regex, it *can* move. */
- if ( paren && RExC_open_parens[paren] >= opnd ) {
+ if ( paren && RExC_open_parens[paren] >= operand ) {
/*DEBUG_PARSE_FMT("open"," - %d",size);*/
RExC_open_parens[paren] += size;
} else {
/*DEBUG_PARSE_FMT("open"," - %s","ok");*/
}
- if ( RExC_close_parens[paren] >= opnd ) {
+ if ( RExC_close_parens[paren] >= operand ) {
/*DEBUG_PARSE_FMT("close"," - %d",size);*/
RExC_close_parens[paren] += size;
} else {
if (RExC_end_op)
RExC_end_op += size;
- while (src > opnd) {
+ while (src > operand) {
StructCopy(--src, --dst, regnode);
#ifdef RE_TRACK_PATTERN_OFFSETS
if (RExC_offsets) { /* MJD 20010112 */
}
- place = opnd; /* Op node, where operand used to be. */
+ place = operand; /* Op node, where operand used to be. */
#ifdef RE_TRACK_PATTERN_OFFSETS
if (RExC_offsets) { /* MJD */
MJD_OFFSET_DEBUG(
if (ri->u.offsets)
Safefree(ri->u.offsets); /* 20010421 MJD */
#endif
- if (ri->code_blocks) {
- ri->code_blocks->attached = FALSE;
+ if (ri->code_blocks)
S_free_codeblocks(aTHX_ ri->code_blocks);
- }
if (ri->data) {
int n = ri->data->count;
reti->code_blocks->cb[n].src_regex = (REGEXP*)
sv_dup_inc((SV*)(ri->code_blocks->cb[n].src_regex), param);
reti->code_blocks->count = ri->code_blocks->count;
- reti->code_blocks->attached = TRUE;
+ reti->code_blocks->refcnt = 1;
}
else
reti->code_blocks = NULL;