DEBUG_STUDYDATA("commit: ",data,0);
}
+/* These macros set, clear and test whether the synthetic start class ('ssc',
+ * given by the parameter) matches an empty string (EOS). This uses the
+ * 'next_off' field in the node, to save a bit in the flags field. The ssc
+ * stands alone, so there is never a next_off, so this field is otherwise
+ * unused. The EOS information is used only for compilation, but theoretically
+ * it could be passed on to the execution code. This could be used to store
+ * more than one bit of information, but only this one is currently used. */
+#define SET_SSC_EOS(node) STMT_START { (node)->next_off = TRUE; } STMT_END
+#define CLEAR_SSC_EOS(node) STMT_START { (node)->next_off = FALSE; } STMT_END
+#define TEST_SSC_EOS(node) cBOOL((node)->next_off)
+
/* Can match anything (initialization) */
STATIC void
S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
PERL_ARGS_ASSERT_CL_ANYTHING;
ANYOF_BITMAP_SETALL(cl);
- cl->flags = ANYOF_CLASS|ANYOF_EOS|ANYOF_UNICODE_ALL
- |ANYOF_NON_UTF8_LATIN1_ALL;
+ cl->flags = ANYOF_UNICODE_ALL;
+ SET_SSC_EOS(cl);
/* If any portion of the regex is to operate under locale rules,
* initialization includes it. The reason this isn't done for all regexes
* necessary. */
if (RExC_contains_locale) {
ANYOF_CLASS_SETALL(cl); /* /l uses class */
- cl->flags |= ANYOF_LOCALE|ANYOF_LOC_FOLD;
+ cl->flags |= ANYOF_LOCALE|ANYOF_CLASS|ANYOF_LOC_FOLD;
}
else {
ANYOF_CLASS_ZERO(cl); /* Only /l uses class now */
{
PERL_ARGS_ASSERT_CL_AND;
- assert(and_with->type == ANYOF);
+ assert(PL_regkind[and_with->type] == ANYOF);
/* I (khw) am not sure all these restrictions are necessary XXX */
if (!(ANYOF_CLASS_TEST_ANY_SET(and_with))
StructCopy(&accum, data->start_class,
struct regnode_charclass_class);
flags |= SCF_DO_STCLASS_OR;
- data->start_class->flags |= ANYOF_EOS;
+ SET_SSC_EOS(data->start_class);
}
}
* utf8 string, so accept a possible false positive for
* latin1-range folds */
if (uc >= 0x100 ||
- (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
+ (!(data->start_class->flags & ANYOF_LOCALE)
&& !ANYOF_BITMAP_TEST(data->start_class, uc)
&& (!(data->start_class->flags & ANYOF_LOC_FOLD)
|| !ANYOF_BITMAP_TEST(data->start_class, PL_fold_latin1[uc])))
}
}
}
- data->start_class->flags &= ~ANYOF_EOS;
+ CLEAR_SSC_EOS(data->start_class);
if (uc < 0x100)
data->start_class->flags &= ~ANYOF_UNICODE_ALL;
}
ANYOF_BITMAP_SET(data->start_class, uc);
else
data->start_class->flags |= ANYOF_UNICODE_ALL;
- data->start_class->flags &= ~ANYOF_EOS;
+ CLEAR_SSC_EOS(data->start_class);
cl_and(data->start_class, and_withp);
}
flags &= ~SCF_DO_STCLASS;
/* Check whether it is compatible with what we know already! */
int compat = 1;
if (uc >= 0x100 ||
- (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
+ (!(data->start_class->flags & ANYOF_LOCALE)
&& !ANYOF_BITMAP_TEST(data->start_class, uc)
&& !ANYOF_BITMAP_TEST(data->start_class, PL_fold_latin1[uc])))
{
ANYOF_BITMAP_ZERO(data->start_class);
if (compat) {
ANYOF_BITMAP_SET(data->start_class, uc);
- data->start_class->flags &= ~ANYOF_EOS;
+ CLEAR_SSC_EOS(data->start_class);
if (OP(scan) == EXACTFL) {
/* XXX This set is probably no longer necessary, and
* probably wrong as LOCALE now is on in the initial
}
}
}
- data->start_class->flags &= ~ANYOF_EOS;
+ CLEAR_SSC_EOS(data->start_class);
}
cl_and(data->start_class, and_withp);
}
StructCopy(&this_class, data->start_class,
struct regnode_charclass_class);
flags |= SCF_DO_STCLASS_OR;
- data->start_class->flags |= ANYOF_EOS;
+ SET_SSC_EOS(data->start_class);
}
} else { /* Non-zero len */
if (flags & SCF_DO_STCLASS_OR) {
else if (OP(scan) == LNBREAK) {
if (flags & SCF_DO_STCLASS) {
int value = 0;
- data->start_class->flags &= ~ANYOF_EOS; /* No match on empty */
+ CLEAR_SSC_EOS(data->start_class); /* No match on empty */
if (flags & SCF_DO_STCLASS_AND) {
for (value = 0; value < 256; value++)
if (!is_VERTWS_cp(value))
min++;
if (flags & SCF_DO_STCLASS) {
int loop_max = 256;
- data->start_class->flags &= ~ANYOF_EOS; /* No match on empty */
+ CLEAR_SSC_EOS(data->start_class); /* No match on empty */
/* Some of the logic below assumes that switching
locale on will only add false positives. */
case SANY:
default:
- do_default:
- /* Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); */
+#ifdef DEBUGGING
+ Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan));
+#endif
+ do_default:
if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
cl_anything(pRExC_state, data->start_class);
break;
goto do_default;
if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
- || ANYOF_CLASS_TEST_ANY_SET(data->start_class));
+ || ANYOF_CLASS_TEST_ANY_SET(data->start_class));
cl_anything(pRExC_state, data->start_class);
}
if (flags & SCF_DO_STCLASS_AND || !value)
break;
case POSIXA:
loop_max = 128;
+ /* FALL THROUGH */
case POSIXL:
case POSIXD:
case POSIXU:
break;
case NPOSIXA:
loop_max = 128;
+ /* FALL THROUGH */
case NPOSIXL:
case NPOSIXU:
case NPOSIXD:
cl_init(pRExC_state, data->start_class);
} else {
/* AND before and after: combine and continue */
- const int was = (data->start_class->flags & ANYOF_EOS);
+ const int was = TEST_SSC_EOS(data->start_class);
cl_and(data->start_class, &intrnl);
if (was)
- data->start_class->flags |= ANYOF_EOS;
+ SET_SSC_EOS(data->start_class);
}
}
}
*minnextp += min;
if (f & SCF_DO_STCLASS_AND) {
- const int was = (data->start_class->flags & ANYOF_EOS);
+ const int was = TEST_SSC_EOS(data.start_class);
cl_and(data->start_class, &intrnl);
if (was)
- data->start_class->flags |= ANYOF_EOS;
+ SET_SSC_EOS(data->start_class);
}
if (data) {
if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
StructCopy(&accum, data->start_class,
struct regnode_charclass_class);
flags |= SCF_DO_STCLASS_OR;
- data->start_class->flags |= ANYOF_EOS;
+ SET_SSC_EOS(data->start_class);
}
}
scan= tail;
PL_Posix_ptrs[_CC_BLANK] = _new_invlist_C_array(PosixBlank_invlist);
PL_XPosix_ptrs[_CC_BLANK] = _new_invlist_C_array(XPosixBlank_invlist);
- PL_L1Cased = _new_invlist_C_array(L1Cased_invlist);
+
+ /* Cased is the same as Alpha in the ASCII range */
+ PL_L1Posix_ptrs[_CC_CASED] = _new_invlist_C_array(L1Cased_invlist);
+ PL_Posix_ptrs[_CC_CASED] = _new_invlist_C_array(PosixAlpha_invlist);
PL_Posix_ptrs[_CC_CNTRL] = _new_invlist_C_array(PosixCntrl_invlist);
PL_XPosix_ptrs[_CC_CNTRL] = _new_invlist_C_array(XPosixCntrl_invlist);
if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
&& stclass_flag
- && !(data.start_class->flags & ANYOF_EOS)
+ && ! TEST_SSC_EOS(data.start_class)
&& !cl_is_anything(data.start_class))
{
const U32 n = add_data(pRExC_state, 1, "f");
- data.start_class->flags |= ANYOF_IS_SYNTHETIC;
+ OP(data.start_class) = ANYOF_SYNTHETIC;
Newx(RExC_rxi->data->data[n], 1,
struct regnode_charclass_class);
r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
= r->float_substr = r->float_utf8 = NULL;
- if (!(data.start_class->flags & ANYOF_EOS)
+ if (! TEST_SSC_EOS(data.start_class)
&& !cl_is_anything(data.start_class))
{
const U32 n = add_data(pRExC_state, 1, "f");
- data.start_class->flags |= ANYOF_IS_SYNTHETIC;
+ OP(data.start_class) = ANYOF_SYNTHETIC;
Newx(RExC_rxi->data->data[n], 1,
struct regnode_charclass_class);
break;
case 'r':
if (memEQ(posixcc, "lowe", 4)) /* lower */
- namedclass = ANYOF_LOWER;
+ namedclass = (FOLD) ? ANYOF_CASED : ANYOF_LOWER;
else if (memEQ(posixcc, "uppe", 4)) /* upper */
- namedclass = ANYOF_UPPER;
+ namedclass = (FOLD) ? ANYOF_CASED : ANYOF_UPPER;
break;
case 't':
if (memEQ(posixcc, "digi", 4)) /* digit */
if (UCHARAT(RExC_parse) == '^') {
RExC_parse++;
n--;
- value = value == 'p' ? 'P' : 'p'; /* toggle */
+ /* toggle. (The rhs xor gets the single bit that
+ * differs between P and p; the other xor inverts just
+ * that bit) */
+ value ^= 'P' ^ 'p';
+
while (isSPACE(UCHARAT(RExC_parse))) {
RExC_parse++;
n--;
* Unicode range swash fo this character class */
const char *Xname = swash_property_names[classnum];
- /* LOWER and UPPER under fold match ALPHA in the ASCII range,
- * and Cased outside it */
- if (FOLD && ! LOC
- && (classnum == _CC_LOWER || classnum == _CC_UPPER))
- {
- ascii_source = PL_Posix_ptrs[_CC_ALPHA];
- l1_source = PL_L1Cased;
- Xname = "Cased";
- }
-
switch ((I32)namedclass) {
case ANYOF_DIGIT:
case ANYOF_ALPHANUMERIC: /* C's alnum, in contrast to \w */
case ANYOF_ALPHA:
+ case ANYOF_CASED:
case ANYOF_GRAPH:
case ANYOF_LOWER:
case ANYOF_PRINT:
#endif
goto join_posix;
+ case ANYOF_NCASED:
case ANYOF_LOWER:
case ANYOF_NLOWER:
case ANYOF_UPPER:
}
if (warn_super) {
- ANYOF_FLAGS(ret) |= ANYOF_WARN_SUPER;
+ OP(ret) = ANYOF_WARN_SUPER;
}
}
/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
static const char * const anyofs[] = {
#if _CC_WORDCHAR != 0 || _CC_DIGIT != 1 || _CC_ALPHA != 2 || _CC_LOWER != 3 \
- || _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6 \
- || _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8 || _CC_SPACE != 9 \
- || _CC_BLANK != 10 || _CC_XDIGIT != 11 || _CC_PSXSPC != 12 \
- || _CC_CNTRL != 13 || _CC_ASCII != 14 || _CC_VERTSPACE != 15
+ || _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6 \
+ || _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8 || _CC_CASED != 9 \
+ || _CC_SPACE != 10 || _CC_BLANK != 11 || _CC_XDIGIT != 12 \
+ || _CC_PSXSPC != 13 || _CC_CNTRL != 14 || _CC_ASCII != 15 \
+ || _CC_VERTSPACE != 16
#error Need to adjust order of anyofs[]
#endif
"[\\w]",
"[:^alnum:]",
"[:graph:]",
"[:^graph:]",
+ "[:cased:]",
+ "[:^cased:]",
"[\\s]",
"[\\S]",
"[:blank:]",