=head1 Incompatible Changes
-XXX For a release on a stable branch, this section aspires to be:
-
- There are no changes intentionally incompatible with 5.XXX.XXX
- If any exist, they are bugs, and we request that you submit a
- report. See L</Reporting Bugs> below.
-
=head2 Lexical $_ has been removed
C<my $_> was introduced in Perl v5.10, and subsequently caused much confusion
backward-incompatible) way. Over the following years, no alternatives were
proposed. The feature has now been removed and will fail to compile.
+=head2 Only blanks and tabs are now allowed within C<[...]> within C<(?[...])>.
+
+The experimental Extended Bracketed Character Classes can contain
+regular bracketed character classes within them. These differ from
+regular ones in that white space is generally ignored, unless escaped by
+preceding it with a backslash. The white space that is ignored is now
+limited to just tab C<\t> and SPACE characters. Previously, it was any
+white space. See
+L<perlrecharclass/Extended Bracketed Character Classes>.
+
=head1 Deprecations
XXX Any deprecated features, syntax, modules etc. should be listed here.
#define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION \
(SvCUR(listsv) != initial_listsv_len)
+/* There is a restricted set of white space characters that are legal when
+ * ignoring white space in a bracketed character class. This generates the
+ * code to skip them.
+ *
+ * There is a line below that uses the same white space criteria but is outside
+ * this macro. Both here and there must use the same definition */
+#define SKIP_BRACKETED_WHITE_SPACE(do_skip, p) \
+ STMT_START { \
+ if (do_skip) { \
+ while ( p < RExC_end \
+ && isBLANK_A(UCHARAT(p))) \
+ { \
+ p++; \
+ } \
+ } \
+ } STMT_END
+
STATIC regnode *
S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
const bool stop_at_1, /* Just parse the next thing, don't
SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated. */
}
- if (skip_white) {
- RExC_parse = regpatws(pRExC_state, RExC_parse,
- FALSE /* means don't recognize comments */ );
- }
+ SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
if (UCHARAT(RExC_parse) == '^') { /* Complement of range. */
RExC_parse++;
invert = TRUE;
allow_multi_folds = FALSE;
MARK_NAUGHTY(1);
- if (skip_white) {
- RExC_parse = regpatws(pRExC_state, RExC_parse,
- FALSE /* means don't recognize comments */ );
- }
+ SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
}
/* Check that they didn't say [:posix:] instead of [[:posix:]] */
break;
}
- if (skip_white) {
- RExC_parse = regpatws(pRExC_state, RExC_parse,
- FALSE /* means don't recognize comments */ );
- }
+ SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
if (UCHARAT(RExC_parse) == ']') {
break;
* skipped, it means that that white space is wanted literally, and
* is already in 'value'. Otherwise, need to translate the escape
* into what it signifies. */
- if (! skip_white || ! is_PATWS_cp(value)) switch ((I32)value) {
+ if (! skip_white || ! isBLANK_A(value)) switch ((I32)value) {
case 'w': namedclass = ANYOF_WORDCHAR; break;
case 'W': namedclass = ANYOF_NWORDCHAR; break;
}
} /* end of namedclass \blah */
- if (skip_white) {
- RExC_parse = regpatws(pRExC_state, RExC_parse,
- FALSE /* means don't recognize comments */ );
- }
+ SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
/* If 'range' is set, 'value' is the ending of a range--check its
* validity. (If value isn't a single code point in the case of a
&& *RExC_parse == '-')
{
char* next_char_ptr = RExC_parse + 1;
- if (skip_white) { /* Get the next real char after the '-' */
- next_char_ptr = regpatws(pRExC_state,
- RExC_parse + 1,
- FALSE); /* means don't recognize
- comments */
- }
+
+ /* Get the next real char after the '-' */
+ SKIP_BRACKETED_WHITE_SPACE(skip_white, next_char_ptr);
/* If the '-' is at the end of the class (just before the ']',
* it is a literal minus; otherwise it is a range */
like("a", qr/(?[ [a] # [[:notaclass:]]
])/, 'A comment isn\'t parsed');
unlike(uni_to_native("\x85"), qr/(?[ \t\85 ])/, 'NEL is white space');
-unlike(uni_to_native("\x85"), qr/(?[ [\t\85] ])/, '... including within nested []');
like(uni_to_native("\x85"), qr/(?[ \t + \\85 ])/, 'can escape NEL to match');
like(uni_to_native("\x85"), qr/(?[ [\\85] ])/, '... including within nested []');
like("\t", qr/(?[ \t + \\85 ])/, 'can do basic union');