*
* All of these macros depend on the above RExC_ accessor macros, which
* in turns depend on a variable pRExC_state being in scope where they
- * are used. This is the based regexp parser context variable which is
+ * are used. This is the standard regexp parser context variable which is
* passed into every non-trivial parse function in this file.
*
* Note that the UTF macro is itself a wrapper around RExC_utf8, so all
static void
S_debug_studydata(pTHX_ const char *where, scan_data_t *data,
- U32 depth, int is_inf)
+ U32 depth, int is_inf,
+ SSize_t min, SSize_t stopmin, SSize_t delta)
{
DECLARE_AND_GET_RE_DEBUG_FLAGS;
DEBUG_OPTIMISE_MORE_r({
if (!data)
return;
- Perl_re_indentf(aTHX_ "%s: Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
+ Perl_re_indentf(aTHX_ "%s: M/S/D: %" IVdf "/%" IVdf "/%" IVdf " Pos:%" IVdf "/%" IVdf " Flags: 0x%" UVXf,
depth,
where,
+ min, stopmin, delta,
(IV)data->pos_min,
(IV)data->pos_delta,
(UV)data->flags
}
-# define DEBUG_STUDYDATA(where, data, depth, is_inf) \
- S_debug_studydata(aTHX_ where, data, depth, is_inf)
+# define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) \
+ S_debug_studydata(aTHX_ where, data, depth, is_inf, min, stopmin, delta)
# define DEBUG_PEEP(str, scan, depth, flags) \
S_debug_peep(aTHX_ str, pRExC_state, scan, depth, flags)
#else
-# define DEBUG_STUDYDATA(where, data, depth, is_inf) NOOP
+# define DEBUG_STUDYDATA(where, data, depth, is_inf, min, stopmin, delta) NOOP
# define DEBUG_PEEP(str, scan, depth, flags) NOOP
#endif
}
data->last_end = -1;
data->flags &= ~SF_BEFORE_EOL;
- DEBUG_STUDYDATA("commit", data, 0, is_inf);
+ DEBUG_STUDYDATA("commit", data, 0, is_inf, -1, -1, -1);
}
/* An SSC is just a regnode_charclass_posix with an extra field: the inversion
S_study_chunk(pTHX_
RExC_state_t *pRExC_state,
regnode **scanp, /* Start here (read-write). */
- SSize_t *minlenp,
+ SSize_t *minlenp, /* used for the minlen of substrings? */
SSize_t *deltap, /* Write maxlen-minlen here. */
regnode *last, /* Stop before this one. */
scan_data_t *data, /* string data about the pattern */
a higher caller is holding a ptr to them. */
)
{
- SSize_t final_minlen;
- /* There must be at least this number of characters to match */
- SSize_t min = 0;
- I32 pars = 0, code;
- regnode *scan = *scanp, *next;
- SSize_t delta = 0;
+ /* vars about the regnodes we are working with */
+ regnode *scan = *scanp; /* the current opcode we are inspecting */
+ regnode *next = NULL; /* the next opcode beyond scan, tmp var */
+ regnode *first_non_open = scan; /* FIXME: should this init to NULL?
+ the first non open regop, if the init
+ val IS an OPEN then we will skip past
+ it just after the var decls section */
+ I32 code = 0; /* temp var used to hold the optype of a regop */
+
+ /* vars about the min and max length of the pattern */
+ SSize_t min = 0; /* min length of this part of the pattern */
+ SSize_t stopmin = OPTIMIZE_INFTY; /* min length accounting for ACCEPT
+ this is adjusted down if we find
+ an ACCEPT */
+ SSize_t delta = 0; /* difference between min and max length
+ (not accounting for stopmin) */
+
+ /* vars about capture buffers in the pattern */
+ I32 pars = 0; /* count of OPEN opcodes */
+ I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0; /* is this op an OPEN? */
+
+ /* vars about whether this pattern contains something that can match
+ * infinitely long strings, eg, X* or X+ */
int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
int is_inf_internal = 0; /* The studied chunk is infinite */
- I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
- scan_data_t data_fake;
- SV *re_trie_maxbuff = NULL;
- regnode *first_non_open = scan;
- SSize_t stopmin = OPTIMIZE_INFTY;
- scan_frame *frame = NULL;
+
+ /* scan_data_t (struct) is used to hold information about the substrings
+ * and start class we have extracted from the string */
+ scan_data_t data_fake; /* temp var used for recursing in some cases */
+
+ SV *re_trie_maxbuff = NULL; /* temp var used to hold whether we can do
+ trie optimizations */
+
+ scan_frame *frame = NULL; /* used as part of fake recursion */
+
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_STUDY_CHUNK;
first_non_open=regnext(first_non_open);
}
-
fake_study_recurse:
DEBUG_r(
RExC_study_chunk_recursed_count++;
*/
bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
/* Peephole optimizer: */
- DEBUG_STUDYDATA("Peep", data, depth, is_inf);
+ DEBUG_STUDYDATA("Peep", data, depth, is_inf, min, stopmin, delta);
DEBUG_PEEP("Peep", scan, depth, flags);
}
if (flags & SCF_DO_STCLASS)
ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class);
+ DEBUG_STUDYDATA("end BRANCH", data, depth, is_inf, min, stopmin, delta);
}
if (code == IFTHEN && num < 2) /* Empty ELSE branch */
min1 = 0;
flags |= SCF_DO_STCLASS_OR;
}
}
+ DEBUG_STUDYDATA("pre TRIE", data, depth, is_inf, min, stopmin, delta);
if (PERL_ENABLE_TRIE_OPTIMISATION
&& OP(startbranch) == BRANCH
} /* end if ( prev) */
} /* TRIE_MAXBUF is non zero */
} /* do trie */
-
+ DEBUG_STUDYDATA("after TRIE", data, depth, is_inf, min, stopmin, delta);
}
else if ( code == BRANCHJ ) { /* single branch is optimized. */
scan = NEXTOPER(NEXTOPER(scan));
RExC_study_chunk_recursed_bytes, U8);
}
/* we havent recursed into this paren yet, so recurse into it */
- DEBUG_STUDYDATA("gosub-set", data, depth, is_inf);
+ DEBUG_STUDYDATA("gosub-set", data, depth, is_inf, min, stopmin, delta);
PAREN_SET(recursed_depth, paren);
my_recursed_depth= recursed_depth + 1;
} else {
- DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf);
+ DEBUG_STUDYDATA("gosub-inf", data, depth, is_inf, min, stopmin, delta);
/* some form of infinite recursion, assume infinite length
* */
if (flags & SCF_DO_SUBSTR) {
(frame && frame->in_gosub) || OP(scan) == GOSUB
);
- DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
+ DEBUG_STUDYDATA("frame-new", data, depth, is_inf, min, stopmin, delta);
DEBUG_PEEP("fnew", scan, depth, flags);
frame = newframe;
ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
}
flags &= ~SCF_DO_STCLASS;
+ DEBUG_STUDYDATA("end EXACT", data, depth, is_inf, min, stopmin, delta);
}
else if (PL_regkind[OP(scan)] == EXACT) {
/* But OP != EXACT!, so is EXACTFish */
flags &= ~SCF_DO_STCLASS;
SvREFCNT_dec(EXACTF_invlist);
}
+ DEBUG_STUDYDATA("end EXACTish", data, depth, is_inf, min, stopmin, delta);
}
else if (REGNODE_VARIES(OP(scan))) {
SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
delta += (minnext + deltanext) * maxcount
- minnext * mincount;
}
+
+ if (data && data->flags & SCF_SEEN_ACCEPT) {
+ if (flags & SCF_DO_SUBSTR) {
+ scan_commit(pRExC_state, data, minlenp, is_inf);
+ flags &= ~SCF_DO_SUBSTR;
+ }
+ if (stopmin > min)
+ stopmin = min;
+ DEBUG_STUDYDATA("after-whilem accept", data, depth, is_inf, min, stopmin, delta);
+ }
/* Try powerful optimization CURLYX => CURLYN. */
if ( OP(oscan) == CURLYX && data
&& data->flags & SF_IN_PAR
last, &data_fake, stopparen,
recursed_depth, NULL, f, depth+1,
mutate_ok);
+
if (scan->flags) {
if ( deltanext < 0
|| deltanext > (I32) U8_MAX
|= SSC_MATCHES_EMPTY_STRING;
}
}
+ DEBUG_STUDYDATA("end LOOKAROUND", data, depth, is_inf, min, stopmin, delta);
}
#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
else {
if (OP(scan)==ACCEPT) {
/* m{(*ACCEPT)x} does not have to start with 'x' */
flags &= ~SCF_DO_STCLASS;
- if (data) {
+ if (data)
data->flags |= SCF_SEEN_ACCEPT;
- if (stopmin > min)
- stopmin = min;
- }
+ if (stopmin > min)
+ stopmin = min;
}
}
else if (OP(scan) == COMMIT) {
if (flags & SCF_DO_STCLASS)
ssc_or(pRExC_state, &accum, (regnode_charclass *) &this_class);
}
+ DEBUG_STUDYDATA("after JUMPTRIE", data, depth, is_inf, min, stopmin, delta);
}
if (flags & SCF_DO_SUBSTR) {
data->pos_min += min1;
}
}
scan= tail;
+ DEBUG_STUDYDATA("after TRIE study", data, depth, is_inf, min, stopmin, delta);
continue;
}
#else
/* we need to unwind recursion. */
depth = depth - 1;
- DEBUG_STUDYDATA("frame-end", data, depth, is_inf);
+ DEBUG_STUDYDATA("frame-end", data, depth, is_inf, min, stopmin, delta);
DEBUG_PEEP("fend", scan, depth, flags);
/* restore previous context */
}
assert(!frame);
- DEBUG_STUDYDATA("pre-fin", data, depth, is_inf);
+ DEBUG_STUDYDATA("pre-fin", data, depth, is_inf, min, stopmin, delta);
+
+ if (min > stopmin) {
+ /* stopmin might be shorter than min if we saw an (*ACCEPT). If
+ this is the case then it means this pattern is variable length
+ and we need to ensure that the delta accounts for it. delta
+ represents the difference between min length and max length for
+ this part of the pattern. */
+ delta += min - stopmin;
+ min = stopmin;
+ }
*scanp = scan;
*deltap = is_inf_internal ? OPTIMIZE_INFTY : delta;
if (flags & SCF_TRIE_RESTUDY)
data->flags |= SCF_TRIE_RESTUDY;
- DEBUG_STUDYDATA("post-fin", data, depth, is_inf);
-
- final_minlen = min < stopmin
- ? min : stopmin;
if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
- if (final_minlen > OPTIMIZE_INFTY - delta)
+ if (min > OPTIMIZE_INFTY - delta)
RExC_maxlen = OPTIMIZE_INFTY;
- else if (RExC_maxlen < final_minlen + delta)
- RExC_maxlen = final_minlen + delta;
+ else if (RExC_maxlen < min + delta)
+ RExC_maxlen = min + delta;
}
- return final_minlen;
+ DEBUG_STUDYDATA("post-fin", data, depth, is_inf, min, stopmin, delta);
+ return min;
}
/* add a data member to the struct reg_data attached to this regex, it should
RExC_rx->intflags = 0;
RExC_flags = rx_flags; /* don't let top level (?i) bleed */
- RExC_parse = exp;
+ RExC_parse_set(exp);
/* This NUL is guaranteed because the pattern comes from an SV*, and the sv
* code makes sure the final byte is an uncounted NUL. But should this
* ever not be the case, lots of things could read beyond the end of the
* buffer: loops like
- * while(isFOO(*RExC_parse)) RExC_parse++;
+ * while(isFOO(*RExC_parse)) RExC_parse_inc_by(1);
* strchr(RExC_parse, "foo");
* etc. So it is worth noting. */
assert(*RExC_end == '\0');
return ret;
} else {
if (retarray)
- ret = newSVsv(&PL_sv_undef);
+ ret = newSV_type(SVt_NULL);
}
if (retarray)
av_push(retarray, ret);
* using do...while */
if (UTF)
do {
- RExC_parse += UTF8SKIP(RExC_parse);
+ RExC_parse_inc_utf8();
} while ( RExC_parse < RExC_end
&& isWORDCHAR_utf8_safe((U8*)RExC_parse, (U8*) RExC_end));
else
do {
- RExC_parse++;
+ RExC_parse_inc_by(1);
} while (RExC_parse < RExC_end && isWORDCHAR(*RExC_parse));
} else {
- RExC_parse++; /* so the <- from the vFAIL is after the offending
+ RExC_parse_inc_by(1); /* so the <- from the vFAIL is after the offending
character */
vFAIL("Group name must start with a non-digit word character");
}
/* '^' as an initial flag sets certain defaults */
if (UCHARAT(RExC_parse) == '^') {
- RExC_parse++;
+ RExC_parse_inc_by(1);
has_use_defaults = TRUE;
STD_PMMOD_FLAGS_CLEAR(&RExC_flags);
cs = (toUSE_UNI_CHARSET_NOT_DEPENDS)
if ((RExC_pm_flags & PMf_WILDCARD)) {
if (flagsp == & negflags) {
if (*RExC_parse == 'm') {
- RExC_parse++;
+ RExC_parse_inc_by(1);
/* diag_listed_as: Use of %s is not allowed in Unicode
property wildcard subpatterns in regex; marked by <--
HERE in m/%s/ */
has_charset_modifier = DEPENDS_PAT_MOD;
break;
excess_modifier:
- RExC_parse++;
+ RExC_parse_inc_by(1);
if (has_charset_modifier == ASCII_RESTRICT_PAT_MOD) {
vFAIL2("Regexp modifier \"%c\" may appear a maximum of twice", ASCII_RESTRICT_PAT_MOD);
}
}
NOT_REACHED; /*NOTREACHED*/
neg_modifier:
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL2("Regexp modifier \"%c\" may not appear after the \"-\"",
*(RExC_parse - 1));
NOT_REACHED; /*NOTREACHED*/
if ( (RExC_pm_flags & PMf_WILDCARD)
&& cs != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
{
- RExC_parse++;
+ RExC_parse_inc_by(1);
/* diag_listed_as: Use of %s is not allowed in Unicode
property wildcard subpatterns in regex; marked by <--
HERE in m/%s/ */
return;
default:
fail_modifiers:
- RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
+ RExC_parse_inc_if_char();
/* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
vFAIL2utf8f("Sequence (%" UTF8f "...) not recognized",
UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
NOT_REACHED; /*NOTREACHED*/
}
- RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+ RExC_parse_inc();
}
vFAIL("Sequence (?... not terminated");
modifier_illegal_in_wildcard:
- RExC_parse++;
+ RExC_parse_inc_by(1);
/* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
subpatterns in regex; marked by <-- HERE in m/%s/ */
vFAIL2("Use of modifier '%c' is not allowed in Unicode property wildcard"
if (RExC_parse != name_start && ch == '}') {
while (isBLANK(*RExC_parse)) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
}
if (RExC_parse == name_start || *RExC_parse != ch) {
/* Having this true makes it feasible to have a lot fewer tests for the
* parse pointer being in scope. For example, we can write
- * while(isFOO(*RExC_parse)) RExC_parse++;
+ * while(isFOO(*RExC_parse)) RExC_parse_inc_by(1);
* instead of
- * while(RExC_parse < RExC_end && isFOO(*RExC_parse)) RExC_parse++;
+ * while(RExC_parse < RExC_end && isFOO(*RExC_parse)) RExC_parse_inc_by(1);
*/
assert(*RExC_end == '\0');
U32 seen_flag_set = 0; /* RExC_seen flags we must set */
if (has_intervening_patws) {
- RExC_parse++; /* past the '*' */
+ RExC_parse_inc_by(1); /* past the '*' */
/* For strict backwards compatibility, don't change the message
* now that we also have lowercase operands */
if (isUPPER(*RExC_parse)) {
has_upper = TRUE;
}
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
else {
- RExC_parse += UTF8SKIP(RExC_parse);
+ RExC_parse_inc_utf8();
}
}
verb_len = RExC_parse - start_verb;
goto unterminated_verb_pattern;
}
- RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+ RExC_parse_inc();
while ( RExC_parse < RExC_end && *RExC_parse != ')' ) {
- RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+ RExC_parse_inc();
}
if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
unterminated_verb_pattern:
goto no_colon;
}
- RExC_parse = start_arg;
+ RExC_parse_set(start_arg);
if (RExC_in_script_run) {
RExC_in_lookaround++;
RExC_seen |= seen_flag_set;
- RExC_parse = start_arg;
+ RExC_parse_set(start_arg);
goto parse_rest;
no_colon:
} /* End of switch */
if ( ! op ) {
- RExC_parse += UTF ? UTF8_SAFE_SKIP(RExC_parse, RExC_end) : 1;
+ RExC_parse_inc_safe();
if (has_upper || verb_len == 0) {
vFAIL2utf8f( "Unknown verb pattern '%" UTF8f "'",
UTF8fARG(UTF, verb_len, start_verb));
const char impossible_group[] = "Invalid reference to group";
if (has_intervening_patws) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL("In '(?...)', the '(' and '?' must be adjacent");
}
- RExC_parse++; /* past the '?' */
+ RExC_parse_inc_by(1); /* past the '?' */
paren = *RExC_parse; /* might be a trailing NUL, if not
well-formed */
- RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+ RExC_parse_inc();
if (RExC_parse > RExC_end) {
paren = '\0';
}
case 'P': /* (?P...) variants for those used to PCRE/Python */
paren = *RExC_parse;
if ( paren == '<') { /* (?P<...>) named capture */
- RExC_parse++;
+ RExC_parse_inc_by(1);
if (RExC_parse >= RExC_end) {
vFAIL("Sequence (?P<... not terminated");
}
goto named_capture;
}
else if (paren == '>') { /* (?P>name) named recursion */
- RExC_parse++;
+ RExC_parse_inc_by(1);
if (RExC_parse >= RExC_end) {
vFAIL("Sequence (?P>... not terminated");
}
goto named_recursion;
}
else if (paren == '=') { /* (?P=...) named backref */
- RExC_parse++;
+ RExC_parse_inc_by(1);
return handle_named_backref(pRExC_state, flagp,
segment_parse_start, ')');
}
- RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
+ RExC_parse_inc_if_char();
/* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
vFAIL3("Sequence (%.*s...) not recognized",
(int) (RExC_parse - seqstart), seqstart);
/* If you want to support (?<*...), first reconcile with GH #17363 */
if (*RExC_parse == '!') {
paren = ','; /* negative lookbehind (?<! ... ) */
- RExC_parse++;
+ RExC_parse_inc_by(1);
if ((ret= reg_la_OPFAIL(pRExC_state,REG_LB_SEEN,"?<!")))
return ret;
break;
else
if (*RExC_parse == '=') {
/* paren = '<' - negative lookahead (?<= ... ) */
- RExC_parse++;
+ RExC_parse_inc_by(1);
if ((ret= reg_la_NOTHING(pRExC_state,REG_LB_SEEN,"?<=")))
return ret;
break;
/* NOTREACHED */
case '+':
if (! inRANGE(RExC_parse[0], '1', '9')) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL("Illegal pattern");
}
goto parse_recursion;
/* FALLTHROUGH */
case '1': case '2': case '3': case '4': /* (?1) */
case '5': case '6': case '7': case '8': case '9':
- RExC_parse = (char *) seqstart + 1; /* Point to the digit */
+ RExC_parse_set((char *) seqstart + 1); /* Point to the digit */
parse_recursion:
{
bool is_neg = FALSE;
UV unum;
segment_parse_start = RExC_parse - 1;
if (*RExC_parse == '-') {
- RExC_parse++;
+ RExC_parse_inc_by(1);
is_neg = TRUE;
}
endptr = RExC_end;
&& unum <= I32_MAX
) {
num = (I32)unum;
- RExC_parse = (char*)endptr;
+ RExC_parse_set((char*)endptr);
}
else { /* Overflow, or something like that. Position
beyond all digits for the message */
while (RExC_parse < RExC_end && isDIGIT(*RExC_parse)) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
vFAIL(impossible_group);
}
/* Don't overflow */
if (UNLIKELY(I32_MAX - RExC_npar < num)) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL(impossible_group);
}
num += RExC_npar;
if (paren == '-' && num < 1) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL(non_existent_group_msg);
}
}
* then reparsing */
if (ALL_PARENS_COUNTED) {
if (num >= RExC_total_parens) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL(non_existent_group_msg);
}
}
case '?': /* (??...) */
is_logical = 1;
if (*RExC_parse != '{') {
- RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
+ RExC_parse_inc_if_char();
/* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
vFAIL2utf8f(
"Sequence (%" UTF8f "...) not recognized",
}
*flagp |= POSTPONED;
paren = '{';
- RExC_parse++;
+ RExC_parse_inc_by(1);
/* FALLTHROUGH */
case '{': /* (?{...}) */
{
}
/* this is a pre-compiled code block (?{...}) */
cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index];
- RExC_parse = RExC_start + cb->end;
+ RExC_parse_set(RExC_start + cb->end);
o = cb->block;
if (cb->src_regex) {
n = add_data(pRExC_state, STR_WITH_LEN("rl"));
|| RExC_parse[0] == '\'' ) /* (?('NAME')...) */
{
char ch = RExC_parse[0] == '<' ? '>' : '\'';
- char *name_start= RExC_parse++;
+ char *name_start= RExC_parse;
+ RExC_parse_inc_by(1);
U32 num = 0;
SV *sv_dat=reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
if ( RExC_parse == name_start
vFAIL2("Sequence (?(%c... not terminated",
(ch == '>' ? '<' : ch));
}
- RExC_parse++;
+ RExC_parse_inc_by(1);
if (sv_dat) {
num = add_data( pRExC_state, STR_WITH_LEN("S"));
RExC_rxi->data->data[num]=(void*)sv_dat;
"DEFINE"))
{
ret = reganode(pRExC_state, DEFINEP, 0);
- RExC_parse += DEFINE_len;
+ RExC_parse_inc_by(DEFINE_len);
is_define = 1;
goto insert_if_check_paren;
}
else if (RExC_parse[0] == 'R') {
- RExC_parse++;
+ RExC_parse_inc_by(1);
/* parno == 0 => /(?(R)YES|NO)/ "in any form of recursion OR eval"
* parno == 1 => /(?(R0)YES|NO)/ "in GOSUB (?0) / (?R)"
* parno == 2 => /(?(R1)YES|NO)/ "in GOSUB (?1) (parno-1)"
parno = 0;
if (RExC_parse[0] == '0') {
parno = 1;
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
else if (inRANGE(RExC_parse[0], '1', '9')) {
UV uv;
&& uv <= I32_MAX
) {
parno = (I32)uv + 1;
- RExC_parse = (char*)endptr;
+ RExC_parse_set((char*)endptr);
}
/* else "Switch condition not recognized" below */
} else if (RExC_parse[0] == '&') {
SV *sv_dat;
- RExC_parse++;
+ RExC_parse_inc_by(1);
sv_dat = reg_scan_name(pRExC_state,
REG_RSN_RETURN_DATA);
if (sv_dat)
&& uv <= I32_MAX
) {
parno = (I32)uv;
- RExC_parse = (char*)endptr;
+ RExC_parse_set((char*)endptr);
}
else {
vFAIL("panic: grok_atoUV returned FALSE");
insert_if_check_paren:
if (UCHARAT(RExC_parse) != ')') {
- RExC_parse += UTF
- ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
- : 1;
+ RExC_parse_inc_safe();
vFAIL("Switch condition not recognized");
}
nextchar(pRExC_state);
#endif
return ret;
}
- RExC_parse += UTF
- ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
- : 1;
+ RExC_parse_inc_safe();
vFAIL("Unknown switch condition (?(...))");
}
case '[': /* (?[ ... ]) */
case '*': /* If you want to support (?*...), first reconcile with GH #17363 */
/* FALLTHROUGH */
default: /* e.g., (?i) */
- RExC_parse = (char *) seqstart + 1;
+ RExC_parse_set((char *) seqstart + 1);
parse_flags:
parse_lparen_question_flags(pRExC_state);
if (UCHARAT(RExC_parse) != ':') {
set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
}
if (RExC_parse >= RExC_end || UCHARAT(RExC_parse) != ')') {
- RExC_parse = reg_parse_start;
+ RExC_parse_set(reg_parse_start);
vFAIL("Unmatched (");
}
nextchar(pRExC_state);
}
else if (!paren && RExC_parse < RExC_end) {
if (*RExC_parse == ')') {
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL("Unmatched )");
}
else
}
else if (*start == '0') { /* grok_atoUV() fails for only two reasons:
leading zeros or overflow */
- RExC_parse = (char * ) end;
+ RExC_parse_set((char * ) end);
/* Perhaps too generic a msg for what is only failure from having
* leading zeros, but this is how it's always behaved. */
/* Here, found a quantifier, but was too large; either it overflowed or was
* too big a legal number */
- RExC_parse = (char * ) end;
+ RExC_parse_set((char * ) end);
vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
NOT_REACHED; /*NOTREACHED*/
max = get_quantifier_value(pRExC_state, max_start, max_end);
}
- RExC_parse = (char *) regcurly_return[RBRACE];
+ RExC_parse_set((char *) regcurly_return[RBRACE]);
nextchar(pRExC_state);
if (max < min) { /* If can't match, warn and optimize to fail
/* Forbid extra quantifiers */
if (isQUANTIFIER(RExC_parse, RExC_end)) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL("Nested quantifiers");
}
* reason is to make it harder to write patterns that take a long long time
* to halt, and because the use of this construct isn't necessary in
* matching Unicode property values */
- RExC_parse++;
+ RExC_parse_inc_by(1);
/* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
subpatterns in regex; marked by <-- HERE in m/%s/
*/
* [^\n]. The latter is assumed when the {...} following the \N is a legal
* quantifier, or if there is no '{' at all */
if (*p != '{' || regcurly(p, RExC_end, NULL)) {
- RExC_parse = p;
+ RExC_parse_set(p);
if (cp_count) {
*cp_count = -1;
}
vFAIL("Missing braces on \\N{}");
}
- RExC_parse++; /* Skip past the '{' */
+ RExC_parse_inc_by(1); /* Skip past the '{' */
endbrace = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
if (! endbrace) { /* no trailing brace */
/* \N{_} is what toke.c returns to us to indicate a name that evaluates to
* nothing at all (not allowed under strict) */
if (endbrace - RExC_parse == 1 && *RExC_parse == '_') {
- RExC_parse = endbrace;
+ RExC_parse_set(endbrace);
if (strict) {
- RExC_parse++; /* Position after the "}" */
+ RExC_parse_inc_by(1); /* Position after the "}" */
vFAIL("Zero length \\N{}");
}
}
while (isBLANK(*RExC_parse)) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
e = endbrace;
UTF,
&error_msg);
if (error_msg) {
- RExC_parse = endbrace;
+ RExC_parse_set(endbrace);
vFAIL(error_msg);
}
/* Here, exactly one code point. If that isn't what is wanted,
* fail */
if (! code_point_p) {
- RExC_parse = p;
+ RExC_parse_set(p);
return FALSE;
}
/* Have parsed this entire single code point \N{...}. *cp_count
* has already been set to 1, so don't do it again. */
- RExC_parse = endbrace;
+ RExC_parse_set(endbrace);
nextchar(pRExC_state);
return TRUE;
} /* End of is a single code point */
* case). */
if (! node_p) {
if (! cp_count) {
- RExC_parse = p;
+ RExC_parse_set(p);
}
return FALSE;
}
* converted a name to the \N{U+...} form. This include changing a
* name that evaluates to multiple code points to \N{U+c1.c2.c3 ...} */
- RExC_parse += 2; /* Skip past the 'U+' */
+ RExC_parse_inc_by(2); /* Skip past the 'U+' */
/* Code points are separated by dots. The '}' terminates the whole
* thing. */
UV cp = grok_hex(RExC_parse, &len, &flags, &overflow_value);
if (len == 0) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
bad_NU:
vFAIL("Invalid hexadecimal number in \\N{U+...}");
}
- RExC_parse += len;
+ RExC_parse_inc_by(len);
if (cp > MAX_LEGAL_CP) {
vFAIL(form_cp_too_large_msg(16, start_digit, len, 0));
/* Here, is a single code point; fail if doesn't want that */
if (! code_point_p) {
- RExC_parse = p;
+ RExC_parse_set(p);
return FALSE;
}
/* A single code point is easy to handle; just return it */
*code_point_p = UNI_TO_NATIVE(cp);
- RExC_parse = endbrace;
+ RExC_parse_set(endbrace);
nextchar(pRExC_state);
return TRUE;
}
* \N{U+100.} )
* */
if (*RExC_parse != '.' || RExC_parse + 1 >= e) {
- RExC_parse += (RExC_orig_utf8) /* point to after 1st invalid */
- ? UTF8SKIP(RExC_parse)
- : 1;
- RExC_parse = MIN(e, RExC_parse);/* Guard against malformed utf8
- */
+ /*point to after 1st invalid */
+ RExC_parse_incf(RExC_orig_utf8);
+ /*Guard against malformed utf8*/
+ RExC_parse_set(MIN(e, RExC_parse));
goto bad_NU;
}
/* Move to after the dot (or ending brace the final time through.)
* */
- RExC_parse++;
+ RExC_parse_inc_by(1);
count++;
} while (RExC_parse < e);
save_start = RExC_start;
orig_end = RExC_end;
- RExC_parse = RExC_start = SvPVX(substitute_parse);
+ RExC_start = SvPVX(substitute_parse);
+ RExC_parse_set(RExC_start);
RExC_end = RExC_parse + SvCUR(substitute_parse);
TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE;
/* Restore the saved values */
RESTORE_WARNINGS;
RExC_start = save_start;
- RExC_parse = endbrace;
+ RExC_parse_set(endbrace);
RExC_end = orig_end;
SET_recode_x_to_native(0);
(UV) *flagp);
}
if (*RExC_parse != ']') {
- RExC_parse = cc_parse_start;
+ RExC_parse_set(cc_parse_start);
vFAIL("Unmatched [");
}
nextchar(pRExC_state);
case '?':
case '+':
case '*':
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL("Quantifier follows nothing");
break;
case '\\':
required, as the default for this switch is to jump to the
literal text handling code.
*/
- RExC_parse++;
+ RExC_parse_inc_by(1);
switch ((U8)*RExC_parse) {
/* Special Escapes */
case 'A':
goto finish_meta_pat;
case 'G':
if (RExC_pm_flags & PMf_WILDCARD) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
/* diag_listed_as: Use of %s is not allowed in Unicode property
wildcard subpatterns in regex; marked by <-- HERE in m/%s/
*/
RExC_end - RExC_parse);
char * e = endbrace;
- RExC_parse += 2;
+ RExC_parse_inc_by(2);
if (! endbrace) {
vFAIL2("Missing right brace on \\%c{}", name);
}
while (isBLANK(*RExC_parse)) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
while (RExC_parse < e && isBLANK(*(e - 1))) {
}
if (e == RExC_parse) {
- RExC_parse = endbrace + 1; /* After the '}' */
+ RExC_parse_set(endbrace + 1); /* After the '}' */
vFAIL2("Empty \\%c{}", name);
}
break;
default:
bad_bound_type:
- RExC_parse = e;
+ RExC_parse_set(e);
vFAIL2utf8f(
"'%" UTF8f "' is an unknown bound type",
UTF8fARG(UTF, length, e - length));
NOT_REACHED; /*NOTREACHED*/
}
- RExC_parse = endbrace;
+ RExC_parse_set(endbrace);
REQUIRE_UNI_RULES(flagp, 0);
if (op == BOUND) {
&& UCHARAT(RExC_parse + 1) == '{'
&& UNLIKELY(! regcurly(RExC_parse + 1, RExC_end, NULL)))
{
- RExC_parse += 2;
+ RExC_parse_inc_by(2);
vFAIL("Unescaped left brace in regex is illegal here");
}
nextchar(pRExC_state);
RETURN_FAIL_ON_RESTART_FLAGP(flagp);
/* Here, evaluates to a single code point. Go get that */
- RExC_parse = atom_parse_start;
+ RExC_parse_set(atom_parse_start);
goto defchar;
case 'k': /* Handle \k<NAME> and \k'NAME' and \k{NAME} */
&& ch != '\''
&& ch != '{'))
{
- RExC_parse++;
+ RExC_parse_inc_by(1);
/* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
vFAIL2("Sequence %.2s... not terminated", atom_parse_start);
} else {
- RExC_parse += 2;
+ RExC_parse_inc_by(2);
if (ch == '{') {
while (isBLANK(*RExC_parse)) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
}
ret = handle_named_backref(pRExC_state,
s++;
} while isDIGIT(*s);
- RExC_parse = s;
+ RExC_parse_set(s);
vFAIL("Unterminated \\g{...} pattern");
}
goto parse_named_seq;
}
- RExC_parse = s;
+ RExC_parse_set(s);
num = S_backref_value(RExC_parse, RExC_end);
if (num == 0)
vFAIL("Reference to invalid group 0");
* to be an octal character escape, e.g. \35 or \777.
* The above logic should make it obvious why using
* octal escapes in patterns is problematic. - Yves */
- RExC_parse = atom_parse_start;
+ RExC_parse_set(atom_parse_start);
goto defchar;
}
}
* We've already figured out what value the digits represent.
* Now, move the parse to beyond them. */
if (endbrace) {
- RExC_parse = endbrace + 1;
+ RExC_parse_set(endbrace + 1);
}
else while (isDIGIT(*RExC_parse)) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
if (num >= (I32)RExC_npar) {
default:
/* Do not generate "unrecognized" warnings here, we fall
back into the quick-grab loop below */
- RExC_parse = atom_parse_start;
+ RExC_parse_set(atom_parse_start);
goto defchar;
} /* end of switch on a \foo sequence */
break;
assert((RExC_flags & RXf_PMf_EXTENDED) == 0);
/*
if (RExC_flags & RXf_PMf_EXTENDED) {
- RExC_parse = reg_skipcomment( pRExC_state, RExC_parse );
+ RExC_parse_set( reg_skipcomment( pRExC_state, RExC_parse ) );
if (RExC_parse < RExC_end)
goto tryagain;
}
p++;
break;
case 'N': /* Handle a single-code point named character. */
- RExC_parse = p + 1;
+ RExC_parse_set( p + 1 );
if (! grok_bslash_N(pRExC_state,
NULL, /* Fail if evaluates to
anything other than a
/* Here, it wasn't a single code point. Go close
* up this EXACTish node. The switch() prior to
* this switch handles the other cases */
- RExC_parse = p = oldp;
+ p = oldp;
+ RExC_parse_set(p);
goto loopdone;
}
p = RExC_parse;
- RExC_parse = atom_parse_start;
+ RExC_parse_set(atom_parse_start);
/* The \N{} means the pattern, if previously /d,
* becomes /u. That means it can't be an EXACTF node,
FALSE, /* No illegal cp's */
UTF))
{
- RExC_parse = p; /* going to die anyway; point to
+ RExC_parse_set(p); /* going to die anyway; point to
exact spot of failure */
vFAIL(message);
}
FALSE, /* No illegal cp's */
UTF))
{
- RExC_parse = p; /* going to die anyway; point
+ RExC_parse_set(p); /* going to die anyway; point
to exact spot of failure */
vFAIL(message);
}
{
/* going to die anyway; point to exact spot of
* failure */
- RExC_parse = p + ((UTF)
+ char *new_p= p + ((UTF)
? UTF8_SAFE_SKIP(p, RExC_end)
: 1);
+ RExC_parse_set(new_p);
vFAIL(message);
}
&& isALPHA_A(*(p - 1))
&& *(p - 2) == '\\'))
{
- RExC_parse = p + 1;
+ RExC_parse_set(p + 1);
vFAIL("Unescaped left brace in regex is "
"illegal here");
}
*flagp |= HASWIDTH | maybe_SIMPLE;
}
- RExC_parse = p;
+ RExC_parse_set(p);
{
/* len is STRLEN which is unsigned, need to copy to signed */
&& OP(REGNODE_p(ret)) != SBOL && ! regcurly(RExC_parse, RExC_end, NULL))
{
if (RExC_strict) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL("Unescaped left brace in regex is illegal here");
}
ckWARNreg(RExC_parse + 1, "Unescaped left brace in regex is"
if (*temp_ptr == ']') {
temp_ptr++;
if (! found_problem && ! check_only) {
- RExC_parse = (char *) temp_ptr;
+ RExC_parse_set((char *) temp_ptr);
vFAIL3("POSIX syntax [%c %c] is reserved for future "
"extensions", open_char, open_char);
}
const char * const complement_string = (complement)
? "^"
: "";
- RExC_parse = (char *) p;
+ RExC_parse_set((char *) p);
vFAIL3utf8f("POSIX class [:%s%" UTF8f ":] unknown",
complement_string,
UTF8fARG(UTF, RExC_parse - name_start - 2, name_start));
* compile time values are valid in all runtime cases */
REQUIRE_UNI_RULES(flagp, 0);
- ckWARNexperimental(RExC_parse,
- WARN_EXPERIMENTAL__REGEX_SETS,
- "The regex_sets feature is experimental");
-
/* Everything in this construct is a metacharacter. Operands begin with
* either a '\' (for an escape sequence), or a '[' for a bracketed
* character class. Any other character should be an operator, or
* so that everything gets evaluated down to a single operand, which is the
* result */
- sv_2mortal((SV *)(stack = newAV()));
- sv_2mortal((SV *)(fence_stack = newAV()));
+ stack = (AV*)newSV_type_mortal(SVt_PVAV);
+ fence_stack = (AV*)newSV_type_mortal(SVt_PVAV);
while (RExC_parse < RExC_end) {
I32 top_index; /* Index of top-most element in 'stack' */
* an error: we need to get a single inversion list back
* from the recursion */
- RExC_parse++;
+ RExC_parse_inc_by(1);
RExC_sets_depth++;
node = reg(pRExC_state, 2, flagp, depth+1);
FALSE))
|| ! IS_OPERATOR(*stacked_ptr))))
{
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL("Unexpected '(' with no preceding operator");
}
}
* to fool regclass() into thinking it is part of a
* '[[:posix:]]'. */
if (! is_posix_class) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
/* regclass() can only return RESTART_PARSE and NEED_UTF8 if
if (UCHARAT(RExC_parse - 1) == ']') {
break;
}
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL("Unexpected ')'");
}
/* If nothing after the fence, is missing an operand */
if (top_index - fence < 0) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
goto bad_syntax;
}
/* If at least two things on the stack, treat this as an
goto handle_operand;
}
- RExC_parse++;
+ RExC_parse_inc_by(1);
goto bad_syntax;
case '&':
}
unexpected_binary:
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL2("Unexpected binary operator '%c' with no "
"preceding operand", curchar);
}
break;
default:
- RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+ RExC_parse_inc();
if (RExC_parse >= RExC_end) {
break;
}
} /* End of switch on next parse token */
- RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+ RExC_parse_inc();
} /* End of loop parsing through the construct */
vFAIL("Syntax error in (?[...])");
if (RExC_parse >= RExC_end || RExC_parse[1] != ')') {
if (RExC_parse < RExC_end) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
vFAIL("Unexpected ']' with no following ')' in (?[...");
if (RExC_sets_depth) { /* If within a recursive call, return in a special
regnode */
- RExC_parse++;
+ RExC_parse_inc_by(1);
node = regpnode(pRExC_state, REGEX_SET, final);
}
else {
/* About to generate an ANYOF (or similar) node from the inversion list
* we have calculated */
save_parse = RExC_parse;
- RExC_parse = SvPV(result_string, len);
+ RExC_parse_set(SvPV(result_string, len));
save_end = RExC_end;
RExC_end = RExC_parse + len;
TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE;
);
RESTORE_WARNINGS;
- RExC_parse = save_parse + 1;
+ RExC_parse_set(save_parse + 1);
RExC_end = save_end;
SvREFCNT_dec_NN(final);
SvREFCNT_dec_NN(result_string);
assert(RExC_parse <= RExC_end);
if (UCHARAT(RExC_parse) == '^') { /* Complement the class */
- RExC_parse++;
+ RExC_parse_inc_by(1);
invert = TRUE;
allow_mutiple_chars = FALSE;
MARK_NAUGHTY(1);
value = utf8n_to_uvchr((U8*)RExC_parse,
RExC_end - RExC_parse,
&numlen, UTF8_ALLOW_DEFAULT);
- RExC_parse += numlen;
+ RExC_parse_inc_by(numlen);
+ }
+ else {
+ value = UCHARAT(RExC_parse);
+ RExC_parse_inc_by(1);
}
- else
- value = UCHARAT(RExC_parse++);
if (value == '[') {
char * posix_class_end;
av_undef(posix_warnings);
}
- RExC_parse = posix_class_end;
+ RExC_parse_set(posix_class_end);
}
else if (namedclass == OOB_NAMEDCLASS) {
not_posix_region_end = posix_class_end;
value = utf8n_to_uvchr((U8*)RExC_parse,
RExC_end - RExC_parse,
&numlen, UTF8_ALLOW_DEFAULT);
- RExC_parse += numlen;
+ RExC_parse_inc_by(numlen);
+ }
+ else {
+ value = UCHARAT(RExC_parse);
+ RExC_parse_inc_by(1);
}
- else
- value = UCHARAT(RExC_parse++);
/* Some compilers cannot handle switching on 64-bit integer
* values, therefore value cannot be an UV. Yes, this will
char *e;
if (RExC_pm_flags & PMf_WILDCARD) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
/* diag_listed_as: Use of %s is not allowed in Unicode
property wildcard subpatterns in regex; marked by <--
HERE in m/%s/ */
const U8 c = (U8)value;
e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
if (!e) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
vFAIL2("Missing right brace on \\%c{}", c);
}
- RExC_parse++;
+ RExC_parse_inc_by(1);
/* White space is allowed adjacent to the braces and after
* any '^', even when not under /x */
while (isSPACE(*RExC_parse)) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
if (UCHARAT(RExC_parse) == '^') {
* that bit) */
value ^= 'P' ^ 'p';
- RExC_parse++;
+ RExC_parse_inc_by(1);
while (isSPACE(*RExC_parse)) {
- RExC_parse++;
+ RExC_parse_inc_by(1);
}
}
} /* The \p isn't immediately followed by a '{' */
else if (! isALPHA(*RExC_parse)) {
- RExC_parse += (UTF)
- ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
- : 1;
+ RExC_parse_inc_safe();
vFAIL2("Character following \\%c must be '{' or a "
"single-character Unicode property name",
(U8) value);
);
if (SvCUR(msg)) { /* Assumes any error causes a msg */
assert(prop_definition == NULL);
- RExC_parse = e + 1;
+ RExC_parse_set(e + 1);
if (SvUTF8(msg)) { /* msg being UTF-8 makes the whole
thing so, or else the display is
mojibake */
if (strings) {
if (ret_invlist) {
if (! prop_definition) {
- RExC_parse = e + 1;
+ RExC_parse_set(e + 1);
vFAIL("Unicode string properties are not implemented in (?[...])");
}
else {
}
else if (! RExC_in_multi_char_class) {
if (invert ^ (value == 'P')) {
- RExC_parse = e + 1;
+ RExC_parse_set(e + 1);
vFAIL("Inverting a character class which contains"
" a multi-character sequence is illegal");
}
}
}
- RExC_parse = e + 1;
+ RExC_parse_set(e + 1);
namedclass = ANYOF_UNIPROP; /* no official name, but it's
named */
}
{
/* going to die anyway; point to exact spot of
* failure */
- RExC_parse += (UTF)
- ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
- : 1;
+ RExC_parse_inc_safe();
vFAIL(message);
}
value = grok_c_char;
- RExC_parse++;
+ RExC_parse_inc_by(1);
if (message && TO_OUTPUT_WARNINGS(RExC_parse)) {
warn_non_literal_string(RExC_parse, packed_warn, message);
}
| PERL_SCAN_NOTIFY_ILLDIGIT;
numlen = (strict) ? 4 : 3;
value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
- RExC_parse += numlen;
+ RExC_parse_inc_by(numlen);
if (numlen != 3) {
if (strict) {
- RExC_parse += (UTF)
- ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
- : 1;
+ RExC_parse_inc_safe();
vFAIL("Need exactly 3 octal digits");
}
else if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
/* If the '-' is at the end of the class (just before the ']',
* it is a literal minus; otherwise it is a range */
if (next_char_ptr < RExC_end && *next_char_ptr != ']') {
- RExC_parse = next_char_ptr;
+ RExC_parse_set(next_char_ptr);
/* a bad range like \w-, [:word:]- ? */
if (namedclass > OOB_NAMEDCLASS) {
* reported. See the comments at the definition of
* REPORT_LOCATION_ARGS for details */
RExC_copy_start_in_input = (char *) orig_parse;
- RExC_start = RExC_parse = SvPV(substitute_parse, len);
+ RExC_start = SvPV(substitute_parse, len);
+ RExC_parse_set( RExC_start );
RExC_copy_start_in_constructed = RExC_start + constructed_prefix_len;
RExC_end = RExC_parse + len;
RExC_in_multi_char_class = 1;
*flagp |= reg_flags & (HASWIDTH|SIMPLE|POSTPONED|RESTART_PARSE|NEED_UTF8);
/* And restore so can parse the rest of the pattern */
- RExC_parse = save_parse;
+ RExC_parse_set(save_parse);
RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = save_start;
RExC_end = save_end;
RExC_in_multi_char_class = 0;
those two cases, the parse position is advanced beyond all such comments and
white space.
- This is the UTF, (?#...), and /x friendly way of saying RExC_parse++.
+ This is the UTF, (?#...), and /x friendly way of saying RExC_parse_inc_by(1).
*/
STATIC void
|| UTF8_IS_INVARIANT(*RExC_parse)
|| UTF8_IS_START(*RExC_parse));
- RExC_parse += (UTF)
- ? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
- : 1;
+ RExC_parse_inc_safe();
skip_to_be_ignored_text(pRExC_state, &RExC_parse,
FALSE /* Don't force /x */ );