REPORT_LOCATION_ARGS(offset)); \
} STMT_END
+#define vFAIL3utf8f(m, a1, a2) STMT_START { \
+ const IV offset = RExC_parse - RExC_precomp; \
+ if (!SIZE_ONLY) \
+ SAVEFREESV(RExC_rx_sv); \
+ S_re_croak2(aTHX_ UTF, m, REPORT_LOCATION, a1, a2, \
+ REPORT_LOCATION_ARGS(offset)); \
+} STMT_END
+
/* These have asserts in them because of [perl #122671] Many warnings in
* regcomp.c can occur twice. If they get output in pass1 and later in that
* pass, the pattern has to be converted to UTF-8 and the pass restarted, they
RExC_pm_flags = pm_flags;
if (runtime_code) {
- if (TAINTING_get && TAINT_get)
+ assert(TAINTING_get || !TAINT_get);
+ if (TAINT_get)
Perl_croak(aTHX_ "Eval-group in insecure regular expression");
if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
++RExC_parse;
}
- if (PASS2) {
- STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
- }
+ vFAIL("Sequence (?... not terminated");
}
/*
* enough space for all the things we are about to throw
* away, but we can shrink it by the ammount we are about
* to re-use here */
- RExC_size = PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
+ RExC_size += PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
}
else {
ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
"Useless use of greediness modifier '%c'",
*RExC_parse);
}
- /* Absorb the modifier, so later code doesn't see nor use it */
- nextchar(pRExC_state);
}
do_curly:
* it returns U+FFFD (Replacement character) and sets *encp to NULL.
*/
STATIC UV
-S_reg_recode(pTHX_ const char value, SV **encp)
+S_reg_recode(pTHX_ const U8 value, SV **encp)
{
STRLEN numlen = 1;
- SV * const sv = newSVpvn_flags(&value, numlen, SVs_TEMP);
+ SV * const sv = newSVpvn_flags((const char *) &value, numlen, SVs_TEMP);
const char * const s = *encp ? sv_recode_to_utf8(sv, *encp) : SvPVX(sv);
const STRLEN newlen = SvCUR(sv);
UV uv = UNICODE_REPLACEMENT;
recode_encoding:
if (! RExC_override_recoding) {
SV* enc = _get_encoding();
- ender = reg_recode((const char)(U8)ender, &enc);
+ ender = reg_recode((U8)ender, &enc);
if (!enc && PASS2)
ckWARNreg(p, "Invalid escape in the specified encoding");
REQUIRE_UTF8(flagp);
* default: case next time and keep on incrementing until
* we find one of the invariants we do handle. */
RExC_parse++;
+ if (*RExC_parse == 'c') {
+ /* Skip the \cX notation for control characters */
+ RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+ }
break;
case '[':
{
/* Having gotten rid of the fence, we pop the operand at the
* stack top and process it as a newly encountered operand */
current = av_pop(stack);
- assert(IS_OPERAND(current));
- goto handle_operand;
+ if (IS_OPERAND(current)) {
+ goto handle_operand;
+ }
+
+ RExC_parse++;
+ goto bad_syntax;
case '&':
case '|':
/* Here, the new operator has equal or lower precedence than
* what's already there. This means the operation already
* there should be performed now, before the new one. */
+
rhs = av_pop(stack);
+ if (! IS_OPERAND(rhs)) {
+
+ /* This can happen when a ! is not followed by an operand,
+ * like in /(?[\t &!])/ */
+ goto bad_syntax;
+ }
+
lhs = av_pop(stack);
- assert(IS_OPERAND(rhs));
- assert(IS_OPERAND(lhs));
+ if (! IS_OPERAND(lhs)) {
+
+ /* This can happen when there is an empty (), like in
+ * /(?[[0]+()+])/ */
+ goto bad_syntax;
+ }
switch (stacked_operator) {
case '&':
av_push(stack, rhs);
goto redo_curchar;
- case '!': /* Highest priority, right associative, so just push
- onto stack */
- av_push(stack, newSVuv(curchar));
+ case '!': /* Highest priority, right associative */
+
+ /* If what's already at the top of the stack is another '!",
+ * they just cancel each other out */
+ if ( (top_ptr = av_fetch(stack, top_index, FALSE))
+ && (IS_OPERATOR(*top_ptr) && SvUV(*top_ptr) == '!'))
+ {
+ only_to_avoid_leaks = av_pop(stack);
+ SvREFCNT_dec(only_to_avoid_leaks);
+ }
+ else { /* Otherwise, since it's right associative, just push
+ onto the stack */
+ av_push(stack, newSVuv(curchar));
+ }
break;
default:
|| SvTYPE(final) != SVt_INVLIST
|| av_tindex(stack) >= 0) /* More left on stack */
{
+ bad_syntax:
SvREFCNT_dec(final);
vFAIL("Incomplete expression within '(?[ ])'");
}
if (!SIZE_ONLY) {
SV* invlist;
char* name;
+ char* base_name; /* name after any packages are stripped */
+ const char * const colon_colon = "::";
/* Try to get the definition of the property into
* <invlist>. If /i is in effect, the effective property
HV* curpkg = (IN_PERL_COMPILETIME)
? PL_curstash
: CopSTASH(PL_curcop);
+ UV final_n = n;
+ bool has_pkg;
+
if (swash) { /* Got a swash but no inversion list.
Something is likely wrong that will
be sorted-out later */
* typo) in specifying a Unicode property, or it could
* be a user-defined property that will be available at
* run-time. The names of these must begin with 'In'
- * or 'Is'. So
+ * or 'Is' (after any packages are stripped off). So
* if not one of those, or if we accept only
* compile-time properties, is an error; otherwise add
* it to the list for run-time look up. */
- if ( n < 3
- || name[0] != 'I'
- || (name[1] != 's' && name[1] != 'n')
+ if ((base_name = rninstr(name, name + n,
+ colon_colon, colon_colon + 2)))
+ { /* Has ::. We know this must be a user-defined
+ property */
+ base_name += 2;
+ final_n -= base_name - name;
+ has_pkg = TRUE;
+ }
+ else {
+ base_name = name;
+ has_pkg = FALSE;
+ }
+
+ if ( final_n < 3
+ || base_name[0] != 'I'
+ || (base_name[1] != 's' && base_name[1] != 'n')
|| ret_invlist)
{
+ const char * const msg
+ = (has_pkg)
+ ? "Illegal user-defined property name"
+ : "Can't find Unicode property definition";
RExC_parse = e + 1;
- vFAIL2utf8f(
- "Can't find Unicode property definition \"%"UTF8f"\"",
- UTF8fARG(UTF, n, name));
+
+ /* diag_listed_as: Can't find Unicode property definition "%s" */
+ vFAIL3utf8f("%s \"%"UTF8f"\"",
+ msg, UTF8fARG(UTF, n, name));
}
/* If the property name doesn't already have a package
* name, add the current one to it so that it can be
* referred to outside it. [perl #121777] */
- if (curpkg && ! instr(name, "::")) {
+ if (! has_pkg && curpkg) {
char* pkgname = HvNAME(curpkg);
if (strNE(pkgname, "main")) {
char* full_name = Perl_form(aTHX_
recode_encoding:
if (! RExC_override_recoding) {
SV* enc = _get_encoding();
- value = reg_recode((const char)(U8)value, &enc);
+ value = reg_recode((U8)value, &enc);
if (!enc) {
if (strict) {
vFAIL("Invalid escape in the specified encoding");