through */
U32 study_chunk_recursed_bytes; /* bytes in bitmap */
I32 in_lookbehind;
+ I32 in_lookahead;
I32 contains_locale;
I32 override_recoding;
#ifdef EBCDIC
#define RExC_study_chunk_recursed_bytes \
(pRExC_state->study_chunk_recursed_bytes)
#define RExC_in_lookbehind (pRExC_state->in_lookbehind)
+#define RExC_in_lookahead (pRExC_state->in_lookahead)
#define RExC_contains_locale (pRExC_state->contains_locale)
#ifdef EBCDIC
# define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
if (UTF) { \
SV *zlopp = newSV(UTF8_MAXBYTES); \
unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp); \
- unsigned const char *const kapow = uvchr_to_utf8(flrbbbbb, val); \
+ unsigned char *const kapow = uvchr_to_utf8(flrbbbbb, val); \
+ *kapow = '\0'; \
SvCUR_set(zlopp, kapow - flrbbbbb); \
SvPOK_on(zlopp); \
SvUTF8_on(zlopp); \
RExC_seen = 0;
RExC_maxlen = 0;
RExC_in_lookbehind = 0;
+ RExC_in_lookahead = 0;
RExC_seen_zerolen = *exp == '^' ? -1 : 0;
#ifdef EBCDIC
RExC_recode_x_to_native = 0;
*flagp = 0; /* Tentatively. */
+ if (RExC_in_lookbehind) {
+ RExC_in_lookbehind++;
+ }
+ if (RExC_in_lookahead) {
+ RExC_in_lookahead++;
+ }
+
/* Having this true makes it feasible to have a lot fewer tests for the
* parse pointer being in scope. For example, we can write
* while(isFOO(*RExC_parse)) RExC_parse++;
if (RExC_parse >= RExC_end) {
vFAIL("Sequence (?... not terminated");
}
-
- /* FALLTHROUGH */
+ RExC_seen_zerolen++;
+ break;
case '=': /* (?=...) */
RExC_seen_zerolen++;
+ RExC_in_lookahead++;
break;
case '!': /* (?!...) */
RExC_seen_zerolen++;
if (RExC_in_lookbehind) {
RExC_in_lookbehind--;
}
+ if (RExC_in_lookahead) {
+ RExC_in_lookahead--;
+ }
if (after_freeze > RExC_npar)
RExC_npar = after_freeze;
return(ret);
*flagp |= SIMPLE;
goto finish_meta_pat;
case 'K':
- RExC_seen_zerolen++;
- ret = reg_node(pRExC_state, KEEPS);
- *flagp |= SIMPLE;
- /* XXX:dmq : disabling in-place substitution seems to
- * be necessary here to avoid cases of memory corruption, as
- * with: C<$_="x" x 80; s/x\K/y/> -- rgs
- */
- RExC_seen |= REG_LOOKBEHIND_SEEN;
- goto finish_meta_pat;
+ if (!RExC_in_lookbehind && !RExC_in_lookahead) {
+ RExC_seen_zerolen++;
+ ret = reg_node(pRExC_state, KEEPS);
+ *flagp |= SIMPLE;
+ /* XXX:dmq : disabling in-place substitution seems to
+ * be necessary here to avoid cases of memory corruption, as
+ * with: C<$_="x" x 80; s/x\K/y/> -- rgs
+ */
+ RExC_seen |= REG_LOOKBEHIND_SEEN;
+ goto finish_meta_pat;
+ }
+ else {
+ ++RExC_parse; /* advance past the 'K' */
+ vFAIL("\\K not permitted in lookahead/lookbehind");
+ }
case 'Z':
ret = reg_node(pRExC_state, SEOL);
*flagp |= SIMPLE;
&& num >= RExC_npar
/* cannot be an octal escape if it starts with 8 */
&& *RExC_parse != '8'
- /* cannot be an octal escape it it starts with 9 */
+ /* cannot be an octal escape if it starts with 9 */
&& *RExC_parse != '9'
) {
/* Probably not meant to be a backref, instead likely
full_cp_count += this_end - this_start + 1;
}
- invlist_iterfinish(cp_list);
/* At the end of the loop, we count how many bits differ from
* the bits in lowest code point, call the count 'd'. If the
ret = reganode(pRExC_state, op, lowest_cp);
FLAGS(REGNODE_p(ret)) = ANYOFM_mask;
}
+
+ done_anyofm:
+ invlist_iterfinish(cp_list);
}
- done_anyofm:
if (inverted) {
_invlist_invert(cp_list);
/* Certain properties whose values are numeric need special handling.
* They may optionally be prefixed by 'is'. Ignore that prefix for the
* purposes of checking if this is one of those properties */
- if (memBEGINPs(lookup_name, name_len, "is")) {
+ if (memBEGINPs(lookup_name, j, "is")) {
lookup_offset = 2;
}
}
/* Store the first real character in the denominator */
- lookup_name[j++] = name[i];
+ if (i < name_len) {
+ lookup_name[j++] = name[i];
+ }
}
}
/* If the original input began with 'In' or 'Is', it could be a subroutine
* call to a user-defined property instead of a Unicode property name. */
- if ( non_pkg_begin + name_len > 2
+ if ( name_len - non_pkg_begin > 2
&& name[non_pkg_begin+0] == 'I'
&& (name[non_pkg_begin+1] == 'n' || name[non_pkg_begin+1] == 's'))
{