Pulled from regcomp.c.
*/
PERL_STATIC_INLINE I32
-S_regcurly(pTHX_ const char *s)
+S_regcurly(pTHX_ const char *s,
+ const bool rbrace_must_be_escaped /* Should the terminating '} be
+ preceded by a backslash? This
+ is an abnormal case */
+ )
{
PERL_ARGS_ASSERT_REGCURLY;
while (isDIGIT(*s))
s++;
}
- if (*s != '}')
- return FALSE;
- return TRUE;
+
+ return (rbrace_must_be_escaped)
+ ? *s == '\\' && *(s+1) == '}'
+ : *s == '}';
}
/* XXX Add documentation after final interface and behavior is decided */
|NULLOK re_scream_pos_data *data
Ap |SV* |re_intuit_string|NN REGEXP *const r
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
-EiPR |I32 |regcurly |NN const char *s
+EiPR |I32 |regcurly |NN const char *s \
+ |const bool rbrace_must_be_escaped
#endif
Ap |I32 |regexec_flags |NN REGEXP *const rx|NN char *stringarg \
|NN char *strend|NN char *strbeg|I32 minend \
sR |char* |scan_inputsymbol|NN char *start
sR |char* |scan_pat |NN char *start|I32 type
sR |char* |scan_str |NN char *start|int keep_quoted \
- |int keep_delims|int re_reparse
+ |int keep_delims|int re_reparse \
+ |bool deprecate_escaped_matching
sR |char* |scan_subst |NN char *start
sR |char* |scan_trans |NN char *start
s |char* |scan_word |NN char *s|NN char *dest|STRLEN destlen \
#define grok_bslash_c(a,b,c) S_grok_bslash_c(aTHX_ a,b,c)
#define grok_bslash_o(a,b,c,d,e,f,g) S_grok_bslash_o(aTHX_ a,b,c,d,e,f,g)
#define grok_bslash_x(a,b,c,d,e,f,g) S_grok_bslash_x(aTHX_ a,b,c,d,e,f,g)
-#define regcurly(a) S_regcurly(aTHX_ a)
+#define regcurly(a,b) S_regcurly(aTHX_ a,b)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
#define _add_range_to_invlist(a,b,c) Perl__add_range_to_invlist(aTHX_ a,b,c)
#define scan_ident(a,b,c,d,e) S_scan_ident(aTHX_ a,b,c,d,e)
#define scan_inputsymbol(a) S_scan_inputsymbol(aTHX_ a)
#define scan_pat(a,b) S_scan_pat(aTHX_ a,b)
-#define scan_str(a,b,c,d) S_scan_str(aTHX_ a,b,c,d)
+#define scan_str(a,b,c,d,e) S_scan_str(aTHX_ a,b,c,d,e)
#define scan_subst(a) S_scan_subst(aTHX_ a)
#define scan_trans(a) S_scan_trans(aTHX_ a)
#define scan_word(a,b,c,d,e) S_scan_word(aTHX_ a,b,c,d,e)
# define _CC_QUOTEMETA 20
# define _CC_NON_FINAL_FOLD 21
# define _CC_IS_IN_SOME_FOLD 22
-/* Unused: 23-31
+# define _CC_BACKSLASH_FOO_LBRACE_IS_META 31 /* temp, see mk_PL_charclass.pl */
+/* Unused: 23-30
* If more bits are needed, one could add a second word for non-64bit
* QUAD_IS_INT systems, using some #ifdefs to distinguish between having a 2nd
* word or not. The IS_IN_SOME_FOLD bit is the most easily expendable, as it
/* U+4B 'K' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+4C 'L' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+4D 'M' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
-/* U+4E 'N' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
+/* U+4E 'N' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD)|(1U<<_CC_BACKSLASH_FOO_LBRACE_IS_META),
/* U+4F 'O' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
-/* U+50 'P' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
+/* U+50 'P' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD)|(1U<<_CC_BACKSLASH_FOO_LBRACE_IS_META),
/* U+51 'Q' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+52 'R' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+53 'S' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_NON_FINAL_FOLD)|(1U<<_CC_PRINT)|(1U<<_CC_UPPER)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+64 'd' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_XDIGIT)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+65 'e' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_XDIGIT)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+66 'f' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_NON_FINAL_FOLD)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_XDIGIT)|(1U<<_CC_IS_IN_SOME_FOLD),
-/* U+67 'g' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
+/* U+67 'g' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD)|(1U<<_CC_BACKSLASH_FOO_LBRACE_IS_META),
/* U+68 'h' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_NON_FINAL_FOLD)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+69 'i' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_NON_FINAL_FOLD)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+6A 'j' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_NON_FINAL_FOLD)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
-/* U+6B 'k' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
+/* U+6B 'k' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD)|(1U<<_CC_BACKSLASH_FOO_LBRACE_IS_META),
/* U+6C 'l' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+6D 'm' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+6E 'n' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
-/* U+6F 'o' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
-/* U+70 'p' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
+/* U+6F 'o' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD)|(1U<<_CC_BACKSLASH_FOO_LBRACE_IS_META),
+/* U+70 'p' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD)|(1U<<_CC_BACKSLASH_FOO_LBRACE_IS_META),
/* U+71 'q' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+72 'r' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+73 's' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_NON_FINAL_FOLD)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+75 'u' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+76 'v' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+77 'w' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_NON_FINAL_FOLD)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
-/* U+78 'x' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
+/* U+78 'x' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD)|(1U<<_CC_BACKSLASH_FOO_LBRACE_IS_META),
/* U+79 'y' */ (1U<<_CC_NONLATIN1_FOLD)|(1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_NON_FINAL_FOLD)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+7A 'z' */ (1U<<_CC_ALPHANUMERIC)|(1U<<_CC_ALPHA)|(1U<<_CC_ASCII)|(1U<<_CC_CASED)|(1U<<_CC_CHARNAME_CONT)|(1U<<_CC_GRAPH)|(1U<<_CC_IDFIRST)|(1U<<_CC_LOWER)|(1U<<_CC_PRINT)|(1U<<_CC_WORDCHAR)|(1U<<_CC_IS_IN_SOME_FOLD),
/* U+7B '{' */ (1U<<_CC_ASCII)|(1U<<_CC_GRAPH)|(1U<<_CC_PRINT)|(1U<<_CC_PUNCT)|(1U<<_CC_QUOTEMETA),
deprecated:
C<to_uni_lower_lc>, C<to_uni_title_lc>, and C<to_uni_upper_lc>.
+=head2 Certain rare uses of backslashes within regexes are now deprectated
+
+There are three pairs of characters that Perl recognizes as
+metacharacters in regular expression patterns: C<{}>, C<[]>, and C<()>.
+These can be used as well to delimit patterns, as in:
+
+ m{foo}
+ s(foo)(bar)
+
+Since they are metacharacters, they have special meaning to regular
+expression patterns, and it turns out that you can't turn off that
+special meaning by the normal means of preceding them with a backslash,
+if you use them, paired, within a pattern delimitted by them. For
+example, in
+
+ m{foo\{1,3\}}
+
+the backslashes do not change the behavior, and this matches
+S<C<"f o">> followed by one to three more occurrences of C<"o">.
+
+Usages like this, where they are interpreted as metacharacters, are
+exceedingly rare; we think there are none, for example, in all of CPAN.
+Hence, this deprecation should affect very little code. It does give
+notice, however, that any such code needs to change, which will in turn
+allow us to change the behavior in future Perl versions so that the
+backslashes do have an effect, and without fear that we are silently
+breaking any existing code.
+
=head1 Performance Enhancements
XXX Changes which enhance performance without changing behaviour go here.
same length as the replacelist. See L<perlop> for more information
about the /d modifier.
+=item Useless use of '\'; doesn't escape metacharacter '%c'
+
+(D deprecated) You wrote a regular expression pattern something like
+one of these:
+
+ m{ \x\{FF\} }x
+ m{foo\{1,3\}}
+ qr(foo\(bar\))
+ s[foo\[a-z\]bar][baz]
+
+The interior braces, square brackets, and parentheses are treated as
+metacharacters even though they are backslashed; instead write:
+
+ m{ \x{FF} }x
+ m{foo{1,3}}
+ qr(foo(bar))
+ s[foo[a-z]bar][baz]
+
+The backslashes have no effect when a regular expression pattern is
+delimitted by C<{}>, C<[]>, or C<()>, which ordinarily are
+metacharacters, and the delimiters are also used, paired, within the
+interior of the pattern. It is planned that a future Perl release will
+change the meaning of constructs like these so that the backslashes
+will have an effect, so remove them from your code.
+
=item Useless use of \E
(W misc) You have a \E in a double-quotish string without a C<\U>,
{n,m} Match at least n but not more than m times
(If a curly bracket occurs in any other context and does not form part of
-a backslashed sequence like C<\x{...}>, it is treated
-as a regular character. In particular, the lower quantifier bound
-is not optional. However, in Perl v5.18, it is planned to issue a
-deprecation warning for all such occurrences, and in Perl v5.20 to
-require literal uses of a curly bracket to be escaped, say by preceding
-them with a backslash or enclosing them within square brackets, (C<"\{">
-or C<"[{]">). This change will allow for future syntax extensions (like
-making the lower bound of a quantifier optional), and better error
-checking of quantifiers. Now, a typo in a quantifier silently causes
-it to be treated as the literal characters. For example,
+a backslashed sequence like C<\x{...}>, it is treated as a regular
+character. In particular, the lower quantifier bound is not optional,
+and a typo in a quantifier silently causes it to be treated as the
+literal characters. For example,
/o{4,3}/
looks like a quantifier that matches 0 times, since 4 is greater than 3,
but it really means to match the sequence of six characters
-S<C<"o { 4 , 3 }">>.)
+S<C<"o { 4 , 3 }">>. It is planned to eventually require literal uses
+of curly brackets to be escaped, say by preceding them with a backslash
+or enclosing them within square brackets, (C<"\{"> or C<"[{]">). This
+change will allow for future syntax extensions (like making the lower
+bound of a quantifier optional), and better error checking. In the
+meantime, you should get in the habit of escaping all instances where
+you mean a literal "{".)
The "*" quantifier is equivalent to C<{0,}>, the "+"
quantifier to C<{1,}>, and the "?" quantifier to C<{0,1}>. n and m are limited
#define PERL_ARGS_ASSERT_GROK_BSLASH_X \
assert(s); assert(uv); assert(error_msg)
-PERL_STATIC_INLINE I32 S_regcurly(pTHX_ const char *s)
+PERL_STATIC_INLINE I32 S_regcurly(pTHX_ const char *s, const bool rbrace_must_be_escaped)
__attribute__warn_unused_result__
__attribute__pure__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_SCAN_PAT \
assert(start)
-STATIC char* S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
+STATIC char* S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse, bool deprecate_escaped_matching)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_SCAN_STR \
#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
- ((*s) == '{' && regcurly(s)))
+ ((*s) == '{' && regcurly(s, FALSE)))
#ifdef SPSTART
#undef SPSTART /* dratted cpp namespace... */
op = *RExC_parse;
- if (op == '{' && regcurly(RExC_parse)) {
+ if (op == '{' && regcurly(RExC_parse, FALSE)) {
maxpos = NULL;
#ifdef RE_TRACK_PATTERN_OFFSETS
parse_start = RExC_parse; /* MJD */
/* Disambiguate between \N meaning a named character versus \N meaning
* [^\n]. The former is assumed when it can't be the latter. */
- if (*p != '{' || regcurly(p)) {
+ if (*p != '{' || regcurly(p, FALSE)) {
RExC_parse = p;
if (! node_p) {
/* no bare \N in a charclass */
/* Supposed to be caught earlier. */
break;
case '{':
- if (!regcurly(RExC_parse)) {
+ if (!regcurly(RExC_parse, FALSE)) {
RExC_parse++;
goto defchar;
}
XDIGIT
VERTSPACE
IS_IN_SOME_FOLD
+ BACKSLASH_FOO_LBRACE_IS_META
);
# Read in the case fold mappings.
$re = qr/\p{Is_Non_Final_Fold}/;
} elsif ($name eq 'IS_IN_SOME_FOLD') {
$re = qr/\p{_Perl_Any_Folds}/;
+ } elsif ($name eq 'BACKSLASH_FOO_LBRACE_IS_META') {
+
+ # This is true for FOO where FOO is the varying character in:
+ # \a{, \b{, \c{, ...
+ # and the sequence has non-literal meaning to Perl; so it is true
+ # for 'x' because \x{ is special, but not 'a' because \a{ isn't.
+ $re = qr/[gkNopPx]/;
} else { # The remainder have the same name and values as Unicode
$re = eval "qr/\\p{$name}/";
use Carp;
require a::b + 1; # ambiguity warnings.
}
EXPECT
+########
+# toke.c
+# [perl #XXX] Erroneous ambiguity warnings
+print "aa" =~ m{^a\{1,2\}$}, "\n";
+print "aa" =~ m{^a\x\{61\}$}, "\n";
+print "aa" =~ m{^a{1,2}$}, "\n";
+print "aq" =~ m[^a\[a-z\]$], "\n";
+print "aq" =~ m(^a\(q\)$), "\n";
+no warnings 'deprecated';
+print "aa" =~ m{^a\{1,2\}$}, "\n";
+print "aa" =~ m{^a\x\{61\}$}, "\n";
+print "aq" =~ m[^a\[a-z\]$], "\n";
+print "aq" =~ m(^a\(q\)$), "\n";
+EXPECT
+Useless use of '\'; doesn't escape metacharacter '{' at - line 3.
+Useless use of '\'; doesn't escape metacharacter '{' at - line 4.
+Useless use of '\'; doesn't escape metacharacter '[' at - line 6.
+Useless use of '\'; doesn't escape metacharacter '(' at - line 7.
+1
+1
+1
+1
+q
+1
+1
+1
+q
\Vn \xFFn/ y $& \xFFn
/(?l:a?\w)/ b y $& b
+m?^xy\?$? xy? y $& xy?
# vim: softtabstop=0 noexpandtab
else if (PL_lex_inpat
&& (*s != 'N'
|| s[1] != '{'
- || regcurly(s + 1)))
+ || regcurly(s + 1, FALSE)))
{
*d++ = NATIVE_TO_NEED(has_utf8,'\\');
goto default_action;
/* In a pattern, so maybe we have {n,m}. */
if (*s == '{') {
- if (regcurly(s)) {
+ if (regcurly(s, FALSE)) {
return FALSE;
}
return TRUE;
}
sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
if (*d == '(') {
- d = scan_str(d,TRUE,TRUE,FALSE);
+ d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
if (!d) {
/* MUST advance bufptr here to avoid bogus
"at end of line" context messages from yyerror().
TERM(THING);
case '\'':
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '"':
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '`':
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
LOP(OP_PIPE_OP,XTERM);
case KEY_q:
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_CONST;
case KEY_qw: {
OP *words = NULL;
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
PL_expect = XOPERATOR;
}
case KEY_qq:
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_STRINGIFY;
TERM(sublex_start());
case KEY_qx:
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
missingterm(NULL);
readpipe_override();
const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
STRLEN tmplen;
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
/* strip spaces and check for bad characters */
{
dVAR;
PMOP *pm;
- char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
+ char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing,
+ TRUE /* look for escaped bracketed metas */ );
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE,
+ TRUE /* look for escaped bracketed metas */ );
if (!s)
Perl_croak(aTHX_ "Substitution pattern not terminated");
#endif
first_start = PL_multi_start;
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
Perl_croak(aTHX_ "Transliteration pattern not terminated");
}
#endif
- s = scan_str(s,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
if (d - PL_tokenbuf != len) {
pl_yylval.ival = OP_GLOB;
- s = scan_str(start,!!PL_madskills,FALSE,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
Perl_croak(aTHX_ "Glob not terminated");
return s;
*/
STATIC char *
-S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
+ bool deprecate_escaped_meta /* Should we issue a deprecation warning
+ for certain paired metacharacters that
+ appear escaped within it */
+ )
{
dVAR;
SV *sv; /* scalar value: string */
U8 termstr[UTF8_MAXBYTES]; /* terminating string */
STRLEN termlen; /* length of terminating string */
int last_off = 0; /* last position for nesting bracket */
+ char *escaped_open = NULL;
#ifdef PERL_MAD
int stuffstart;
char *tstart;
PL_multi_close = term;
+ /* A warning is raised if the input parameter requires it for escaped (by a
+ * backslash) paired metacharacters {} [] and () when the delimiters are
+ * those same characters, and the backslash is ineffective. This doesn't
+ * happen for <>, as they aren't metas. */
+ if (deprecate_escaped_meta
+ && (PL_multi_open == PL_multi_close
+ || ! ckWARN_d(WARN_DEPRECATED)
+ || PL_multi_open == '<'))
+ {
+ deprecate_escaped_meta = FALSE;
+ }
+
/* create a new SV to hold the contents. 79 is the SV's initial length.
What a random number. */
sv = newSV_type(SVt_PVIV);
if (*s == '\\' && s+1 < PL_bufend) {
if (!keep_quoted &&
((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
+ {
s++;
+
+ /* Here, 'deprecate_escaped_meta' is true iff the
+ * delimiters are paired metacharacters, and 's' points
+ * to an occurrence of one of them within the string,
+ * which was preceded by a backslash. If this is a
+ * context where the delimiter is also a metacharacter,
+ * the backslash is useless, and deprecated. () and []
+ * are meta in any context. {} are meta only when
+ * appearing in a quantifier or in things like '\p{'.
+ * They also aren't meta unless there is a matching
+ * closed, escaped char later on within the string.
+ * If 's' points to an open, set a flag; if to a close,
+ * test that flag, and raise a warning if it was set */
+
+ if (deprecate_escaped_meta) {
+ if (*s == PL_multi_open) {
+ if (*s != '{') {
+ escaped_open = s;
+ }
+ else if (regcurly(s,
+ TRUE /* Look for a closing
+ '\}' */)
+ || (s - start > 2 /* Look for e.g.
+ '\x{' */
+ && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
+ {
+ escaped_open = s;
+ }
+ }
+ else if (escaped_open) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
+ escaped_open = NULL;
+ }
+ }
+ }
else
*to++ = *s++;
}