U32 frame_count;
AV *warn_text;
HV *unlexed_names;
-#ifdef ADD_TO_REGEXEC
- char *starttry; /* -Dr: where regtry was called. */
-#define RExC_starttry (pRExC_state->starttry)
-#endif
SV *runtime_code_qr; /* qr with the runtime code blocks */
#ifdef DEBUGGING
const char *lastparse;
*
* pm_flags contains the PMf_* flags, typically based on those from the
* pm_flags field of the related PMOP. Currently we're only interested in
- * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL.
+ * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL, PMf_WILDCARD.
*
* For many years this code had an initial sizing pass that calculated
* (sometimes incorrectly, leading to security holes) the size needed for the
/* && memCHRs("iogcmsx", *RExC_parse) */
/* (?g), (?gc) and (?o) are useless here
and must be globally applied -- japhy */
+ if ((RExC_pm_flags & PMf_WILDCARD)) {
+ if (flagsp == & negflags) {
+ if (*RExC_parse == 'm') {
+ RExC_parse++;
+ /* diag_listed_as: Use of %s is not allowed in Unicode
+ property wildcard subpatterns in regex; marked by <--
+ HERE in m/%s/ */
+ vFAIL("Use of modifier '-m' is not allowed in Unicode"
+ " property wildcard subpatterns");
+ }
+ }
+ else {
+ if (*RExC_parse == 's') {
+ goto modifier_illegal_in_wildcard;
+ }
+ }
+ }
+
switch (*RExC_parse) {
/* Code for the imsxn flags */
vFAIL2("Regexp modifier \"%c\" may not appear after the \"-\"",
*(RExC_parse - 1));
NOT_REACHED; /*NOTREACHED*/
- case ONCE_PAT_MOD: /* 'o' */
case GLOBAL_PAT_MOD: /* 'g' */
+ if (RExC_pm_flags & PMf_WILDCARD) {
+ goto modifier_illegal_in_wildcard;
+ }
+ /*FALLTHROUGH*/
+ case ONCE_PAT_MOD: /* 'o' */
if (ckWARN(WARN_REGEXP)) {
const I32 wflagbit = *RExC_parse == 'o'
? WASTED_O
break;
case CONTINUE_PAT_MOD: /* 'c' */
+ if (RExC_pm_flags & PMf_WILDCARD) {
+ goto modifier_illegal_in_wildcard;
+ }
if (ckWARN(WARN_REGEXP)) {
if (! (wastedflags & WASTED_C) ) {
wastedflags |= WASTED_GC;
}
break;
case KEEPCOPY_PAT_MOD: /* 'p' */
+ if (RExC_pm_flags & PMf_WILDCARD) {
+ goto modifier_illegal_in_wildcard;
+ }
if (flagsp == &negflags) {
ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
} else {
case ':':
case ')':
+ if ( (RExC_pm_flags & PMf_WILDCARD)
+ && cs != REGEX_ASCII_MORE_RESTRICTED_CHARSET)
+ {
+ RExC_parse++;
+ /* diag_listed_as: Use of %s is not allowed in Unicode
+ property wildcard subpatterns in regex; marked by <--
+ HERE in m/%s/ */
+ vFAIL2("Use of modifier '%c' is not allowed in Unicode"
+ " property wildcard subpatterns",
+ has_charset_modifier);
+ }
+
if ((posflags & (RXf_PMf_EXTENDED|RXf_PMf_EXTENDED_MORE)) == RXf_PMf_EXTENDED) {
negflags |= RXf_PMf_EXTENDED_MORE;
}
}
vFAIL("Sequence (?... not terminated");
+
+ modifier_illegal_in_wildcard:
+ RExC_parse++;
+ /* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
+ subpatterns in regex; marked by <-- HERE in m/%s/ */
+ vFAIL2("Use of modifier '%c' is not allowed in Unicode property wildcard"
+ " subpatterns", *(RExC_parse - 1));
}
/*
do_curly:
if ((flags&SIMPLE)) {
if (min == 0 && max == REG_INFTY) {
+
+ /* Going from 0..inf is currently forbidden in wildcard
+ * subpatterns. The only reason is to make it harder to
+ * write patterns that take a long long time to halt, and
+ * because the use of this construct isn't necessary in
+ * matching Unicode property values */
+ if (RExC_pm_flags & PMf_WILDCARD) {
+ RExC_parse++;
+ /* diag_listed_as: Use of %s is not allowed in Unicode
+ property wildcard subpatterns in regex; marked by
+ <-- HERE in m/%s/ */
+ vFAIL("Use of quantifier '*' is not allowed in"
+ " Unicode property wildcard subpatterns");
+ /* Note, don't need to worry about {0,}, as a '}' isn't
+ * legal at all in wildcards, so wouldn't get this far
+ * */
+ }
reginsert(pRExC_state, STAR, ret, depth+1);
MARK_NAUGHTY(4);
RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
/* Special Escapes */
case 'A':
RExC_seen_zerolen++;
- ret = reg_node(pRExC_state, SBOL);
- /* SBOL is shared with /^/ so we set the flags so we can tell
- * /\A/ from /^/ in split. */
- FLAGS(REGNODE_p(ret)) = 1;
+ /* Under wildcards, this is changed to match \n; should be
+ * invisible to the user, as they have to compile under /m */
+ if (RExC_pm_flags & PMf_WILDCARD) {
+ ret = reg_node(pRExC_state, MBOL);
+ }
+ else {
+ ret = reg_node(pRExC_state, SBOL);
+ /* SBOL is shared with /^/ so we set the flags so we can tell
+ * /\A/ from /^/ in split. */
+ FLAGS(REGNODE_p(ret)) = 1;
+ }
*flagp |= SIMPLE;
goto finish_meta_pat;
case 'G':
+ if (RExC_pm_flags & PMf_WILDCARD) {
+ RExC_parse++;
+ /* diag_listed_as: Use of %s is not allowed in Unicode property
+ wildcard subpatterns in regex; marked by <-- HERE in m/%s/
+ */
+ vFAIL("Use of '\\G' is not allowed in Unicode property"
+ " wildcard subpatterns");
+ }
ret = reg_node(pRExC_state, GPOS);
RExC_seen |= REG_GPOS_SEEN;
*flagp |= SIMPLE;
vFAIL("\\K not permitted in lookahead/lookbehind");
}
case 'Z':
- ret = reg_node(pRExC_state, SEOL);
+ if (RExC_pm_flags & PMf_WILDCARD) {
+ /* See comment under \A above */
+ ret = reg_node(pRExC_state, MEOL);
+ }
+ else {
+ ret = reg_node(pRExC_state, SEOL);
+ }
*flagp |= SIMPLE;
RExC_seen_zerolen++; /* Do not optimize RE away */
goto finish_meta_pat;
case 'z':
- ret = reg_node(pRExC_state, EOS);
+ if (RExC_pm_flags & PMf_WILDCARD) {
+ /* See comment under \A above */
+ ret = reg_node(pRExC_state, MEOL);
+ }
+ else {
+ ret = reg_node(pRExC_state, EOS);
+ }
*flagp |= SIMPLE;
RExC_seen_zerolen++; /* Do not optimize RE away */
goto finish_meta_pat;
GET_RE_DEBUG_FLAGS_DECL;
PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
+ PERL_UNUSED_ARG(oregcomp_parse); /* Only for Set_Node_Length */
DEBUG_PARSE("xcls");
{
char *e;
+ if (RExC_pm_flags & PMf_WILDCARD) {
+ RExC_parse++;
+ /* diag_listed_as: Use of %s is not allowed in Unicode
+ property wildcard subpatterns in regex; marked by <--
+ HERE in m/%s/ */
+ vFAIL3("Use of '\\%c%c' is not allowed in Unicode property"
+ " wildcard subpatterns", (char) value, *(RExC_parse - 1));
+ }
+
/* \p means they want Unicode semantics */
REQUIRE_UNI_RULES(flagp, 0);
SV *
Perl_re_intuit_string(pTHX_ REGEXP * const r)
{ /* Assume that RE_INTUIT is set */
+ /* Returns an SV containing a string that must appear in the target for it
+ * to match */
+
struct regexp *const prog = ReANY(r);
GET_RE_DEBUG_FLAGS_DECL;
DEBUG_COMPILE_r(
{
- const char * const s = SvPV_nolen_const(RX_UTF8(r)
+ if (prog->maxlen > 0) {
+ const char * const s = SvPV_nolen_const(RX_UTF8(r)
? prog->check_utf8 : prog->check_substr);
- if (!PL_colorset) reginitcolors();
- Perl_re_printf( aTHX_
+ if (!PL_colorset) reginitcolors();
+ Perl_re_printf( aTHX_
"%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
PL_colors[4],
RX_UTF8(r) ? "utf8 " : "",
s,
PL_colors[1],
(strlen(s) > PL_dump_re_max_len ? "..." : ""));
+ }
} );
/* use UTF8 check substring if regexp pattern itself is in UTF8 */
# endif
+STATIC REGEXP *
+S_compile_wildcard(pTHX_ const char * name, const STRLEN len,
+ const bool ignore_case)
+{
+ U32 flags = PMf_MULTILINE|PMf_WILDCARD;
+ REGEXP * subpattern_re;
+
+ PERL_ARGS_ASSERT_COMPILE_WILDCARD;
+
+ if (ignore_case) {
+ flags |= PMf_FOLD;
+ }
+ set_regex_charset(&flags, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
+
+ subpattern_re = re_op_compile_wrapper(sv_2mortal(newSVpvn(name, len)),
+ /* Like in op.c, we copy the compile
+ * time pm flags to the rx ones */
+ (flags & RXf_PMf_COMPILETIME), flags);
+
+ assert(subpattern_re); /* Should have died if didn't compile successfully */
+ return subpattern_re;
+}
+
+STATIC I32
+S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
+ char *strbeg, SSize_t minend, SV *screamer, U32 nosave)
+{
+ I32 result;
+
+ PERL_ARGS_ASSERT_EXECUTE_WILDCARD;
+
+ result = pregexec(prog, stringarg, strend, strbeg, minend, screamer, nosave);
+
+ return result;
+}
+
SV *
Perl_handle_user_defined_property(pTHX_
if (table_index) {
const char * const * prop_values
= UNI_prop_value_ptrs[table_index];
- SV * subpattern;
- Size_t subpattern_len;
REGEXP * subpattern_re;
char open = name[i++];
char close;
* pattern fails to compile, our added text to the user's
* pattern will be displayed to the user, which is not so
* desirable. */
- subpattern_len = name_len - i - 1 - escaped;
- subpattern = Perl_newSVpvf(aTHX_ "(?iaa:%.*s)",
- (unsigned) subpattern_len,
- name + i);
- subpattern = sv_2mortal(subpattern);
- subpattern_re = re_compile(subpattern, 0);
- assert(subpattern_re); /* Should have died if didn't compile
- successfully */
+ subpattern_re = compile_wildcard(name + i,
+ name_len - i - 1 - escaped,
+ TRUE /* /i */
+ );
/* For each legal property value, see if the supplied pattern
* matches it. */
const Size_t len = strlen(entry);
SV* entry_sv = newSVpvn_flags(entry, len, SVs_TEMP);
- if (pregexec(subpattern_re,
+ if (execute_wildcard(subpattern_re,
(char *) entry,
(char *) entry + len,
(char *) entry, 0,