{
regnode *ret = NULL;
I32 flags = 0;
- char *parse_start = RExC_parse;
+ char *parse_start;
U8 op;
int invert = 0;
U8 arg;
PERL_ARGS_ASSERT_REGATOM;
tryagain:
+ parse_start = RExC_parse;
switch ((U8)*RExC_parse) {
case '^':
RExC_seen_zerolen++;
if (*flagp & RESTART_PASS1)
return NULL;
- RExC_parse--;
/* Here, evaluates to a single code point. Go get that */
+ RExC_parse = parse_start;
goto defchar;
case 'k': /* Handle \k<NAME> and \k'NAME' */
* octal character escape, e.g. \35 or \777.
* The above logic should make it obvious why using
* octal escapes in patterns is problematic. - Yves */
+ RExC_parse = parse_start;
goto defchar;
}
}
default:
/* Do not generate "unrecognized" warnings here, we fall
back into the quick-grab loop below */
- parse_start--;
+ RExC_parse = parse_start;
goto defchar;
} /* end of switch on a \foo sequence */
break;
/* FALLTHROUGH */
default:
-
- parse_start = RExC_parse - 1;
-
- RExC_parse++;
-
defchar: {
/* Here, we have determined that the next thing is probably a
- * literal character. (It still may be an escape sequence that
- * evaluates to a single character) */
+ * literal character. RExC_parse points to the first byte of its
+ * definition. (It still may be an escape sequence that evaluates
+ * to a single character) */
STRLEN len = 0;
UV ender = 0;
* could back off to end with only a code point that isn't such a
* non-final, but it is possible for there not to be any in the
* entire node. */
- for (p = RExC_parse - 1;
+
+ assert( ! UTF /* Is at the beginning of a character */
+ || UTF8_IS_INVARIANT(UCHARAT(RExC_parse))
+ || UTF8_IS_START(UCHARAT(RExC_parse)));
+
+ for (p = RExC_parse;
len < upper_parse && p < RExC_end;
len++)
{