#endif
#include "dquote_static.c"
-#include "charclass_invlists.h"
#include "inline_invlist.c"
#include "unicode_constants.h"
}
return final_minlen;
}
- NOT_REACHED;
+ NOT_REACHED; /* NOTREACHED */
}
STATIC U32
DEBUG_r(if (!PL_colorset) reginitcolors());
-#ifndef PERL_IN_XSUB_RE
/* Initialize these here instead of as-needed, as is quick and avoids
* having to test them each time otherwise */
if (! PL_AboveLatin1) {
PL_InBitmap = _add_range_to_invlist(PL_InBitmap, 0,
NUM_ANYOF_CODE_POINTS - 1);
}
-#endif
pRExC_state->code_blocks = NULL;
pRExC_state->num_code_blocks = 0;
Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
(unsigned long) flags);
}
- NOT_REACHED; /* NOT REACHED */
+ NOT_REACHED; /* NOTREACHED */
}
return NULL;
}
if (RExC_parse == RExC_end || *RExC_parse != ')')
vFAIL("Sequence (?&... not terminated");
goto gen_recurse_regop;
- /* NOT REACHED */
+ /* NOTREACHED */
case '+':
if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
RExC_parse++;
vFAIL("Illegal pattern");
}
goto parse_recursion;
- /* NOT REACHED*/
+ /* NOTREACHED*/
case '-': /* (?-1) */
if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
RExC_parse--; /* rewind to let it be handled later */
nextchar(pRExC_state);
return ret;
- /* NOT REACHED */
+ /* NOTREACHED */
case '?': /* (??...) */
is_logical = 1;
Set_Node_Offset(ret, RExC_parse); /* MJD */
is_open = 1;
} else {
+ /* with RXf_PMf_NOCAPTURE treat (...) as (?:...) */
+ paren = ':';
ret = NULL;
}
}
invert = 1;
/* FALLTHROUGH */
case 'b':
+ {
+ regex_charset charset = get_regex_charset(RExC_flags);
+
RExC_seen_zerolen++;
RExC_seen |= REG_LOOKBEHIND_SEEN;
- op = BOUND + get_regex_charset(RExC_flags);
- if (op > BOUNDA) { /* /aa is same as /a */
- op = BOUNDA;
- }
- else if (op == BOUNDL) {
- RExC_contains_locale = 1;
- }
+ op = BOUND + charset;
- if (invert) {
- op += NBOUND - BOUND;
+ if (op == BOUNDL) {
+ RExC_contains_locale = 1;
}
ret = reg_node(pRExC_state, op);
*flagp |= SIMPLE;
- if ((U8) *(RExC_parse + 1) == '{') {
- /* diag_listed_as: Use "%s" instead of "%s" */
- vFAIL3("Use \"\\%c\\{\" instead of \"\\%c{\"", *RExC_parse, *RExC_parse);
+ if (*(RExC_parse + 1) != '{') {
+ FLAGS(ret) = TRADITIONAL_BOUND;
+ if (PASS2 && op > BOUNDA) { /* /aa is same as /a */
+ OP(ret) = BOUNDA;
+ }
+ }
+ else {
+ STRLEN length;
+ char name = *RExC_parse;
+ char * endbrace;
+ RExC_parse += 2;
+ endbrace = strchr(RExC_parse, '}');
+
+ if (! endbrace) {
+ vFAIL2("Missing right brace on \\%c{}", name);
+ }
+ /* XXX Need to decide whether to take spaces or not. Should be
+ * consistent with \p{}, but that currently is SPACE, which
+ * means vertical too, which seems wrong
+ * while (isBLANK(*RExC_parse)) {
+ RExC_parse++;
+ }*/
+ if (endbrace == RExC_parse) {
+ RExC_parse++; /* After the '}' */
+ vFAIL2("Empty \\%c{}", name);
+ }
+ length = endbrace - RExC_parse;
+ /*while (isBLANK(*(RExC_parse + length - 1))) {
+ length--;
+ }*/
+ switch (*RExC_parse) {
+ case 'g':
+ if (length != 1
+ && (length != 3 || strnNE(RExC_parse + 1, "cb", 2)))
+ {
+ goto bad_bound_type;
+ }
+ FLAGS(ret) = GCB_BOUND;
+ break;
+ case 's':
+ if (length != 2 || *(RExC_parse + 1) != 'b') {
+ goto bad_bound_type;
+ }
+ FLAGS(ret) = SB_BOUND;
+ break;
+ case 'w':
+ if (length != 2 || *(RExC_parse + 1) != 'b') {
+ goto bad_bound_type;
+ }
+ FLAGS(ret) = WB_BOUND;
+ break;
+ default:
+ bad_bound_type:
+ RExC_parse = endbrace;
+ vFAIL2utf8f(
+ "'%"UTF8f"' is an unknown bound type",
+ UTF8fARG(UTF, length, endbrace - length));
+ NOT_REACHED; /*NOTREACHED*/
+ }
+ RExC_parse = endbrace;
+ RExC_uni_semantics = 1;
+
+ if (PASS2 && op >= BOUNDA) { /* /aa is same as /a */
+ OP(ret) = BOUNDU;
+ length += 4;
+
+ /* Don't have to worry about UTF-8, in this message because
+ * to get here the contents of the \b must be ASCII */
+ ckWARN4reg(RExC_parse + 1, /* Include the '}' in msg */
+ "Using /u for '%.*s' instead of /%s",
+ (unsigned) length,
+ endbrace - length + 1,
+ (charset == REGEX_ASCII_RESTRICTED_CHARSET)
+ ? ASCII_RESTRICT_PAT_MODS
+ : ASCII_MORE_RESTRICT_PAT_MODS);
+ }
}
+
+ if (PASS2 && invert) {
+ OP(ret) += NBOUND - BOUND;
+ }
goto finish_meta_pat;
+ }
case 'D':
invert = 1;
break;
case 'e':
if (memEQ(posixcc, "spac", 4)) /* space */
- namedclass = ANYOF_PSXSPC;
+ namedclass = ANYOF_SPACE;
break;
case 'h':
if (memEQ(posixcc, "grap", 4)) /* graph */
vFAIL2utf8f(
"Invalid [] range \"%"UTF8f"\"",
UTF8fARG(UTF, w, rangebegin));
- NOT_REACHED; /* NOT REACHED */
+ NOT_REACHED; /* NOTREACHED */
}
}
else {
}
if (ret_invlist) {
+ assert(cp_list);
+
*ret_invlist = cp_list;
SvREFCNT_dec(swash);
|| _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6 \
|| _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8 || _CC_CASED != 9 \
|| _CC_SPACE != 10 || _CC_BLANK != 11 || _CC_XDIGIT != 12 \
- || _CC_PSXSPC != 13 || _CC_CNTRL != 14 || _CC_ASCII != 15 \
- || _CC_VERTSPACE != 16
+ || _CC_CNTRL != 13 || _CC_ASCII != 14 || _CC_VERTSPACE != 15
#error Need to adjust order of anyofs[]
#endif
"\\w",
"[:^blank:]",
"[:xdigit:]",
"[:^xdigit:]",
- "[:space:]",
- "[:^space:]",
"[:cntrl:]",
"[:^cntrl:]",
"[:ascii:]",
Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
}
}
+ else if (k == BOUND || k == NBOUND) {
+ /* Must be synced with order of 'bound_type' in regcomp.h */
+ const char * const bounds[] = {
+ "", /* Traditional */
+ "{gcb}",
+ "{sb}",
+ "{wb}"
+ };
+ sv_catpv(sv, bounds[FLAGS(o)]);
+ }
else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
else if (OP(o) == SBOL)