cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
if (LOC)
cl->flags |= ANYOF_LOCALE;
+ cl->flags |= ANYOF_FOLD;
}
/* Can match anything (initialization) */
if (!(and_with->flags & ANYOF_EOS))
cl->flags &= ~ANYOF_EOS;
- if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE &&
+ if (!(and_with->flags & ANYOF_FOLD))
+ cl->flags &= ~ANYOF_FOLD;
+
+ if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_NONBITMAP &&
!(and_with->flags & ANYOF_INVERT)) {
cl->flags &= ~ANYOF_UNICODE_ALL;
- cl->flags |= ANYOF_UNICODE;
+ cl->flags |= ANYOF_NONBITMAP;
ARG_SET(cl, ARG(and_with));
}
if (!(and_with->flags & ANYOF_UNICODE_ALL) &&
!(and_with->flags & ANYOF_INVERT))
cl->flags &= ~ANYOF_UNICODE_ALL;
- if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) &&
+ if (!(and_with->flags & (ANYOF_NONBITMAP|ANYOF_UNICODE_ALL)) &&
!(and_with->flags & ANYOF_INVERT))
- cl->flags &= ~ANYOF_UNICODE;
+ cl->flags &= ~ANYOF_NONBITMAP;
}
/* 'OR' a given class with another one. Can create false positives */
if (or_with->flags & ANYOF_EOS)
cl->flags |= ANYOF_EOS;
- if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE &&
+ if (or_with->flags & ANYOF_FOLD)
+ cl->flags |= ANYOF_FOLD;
+
+ /* If both nodes match something outside the bitmap, but what they match
+ * outside is not the same pointer, and hence not easily compared, give up
+ * and allow the start class to match everything outside the bitmap */
+ if (cl->flags & ANYOF_NONBITMAP && or_with->flags & ANYOF_NONBITMAP &&
ARG(cl) != ARG(or_with)) {
cl->flags |= ANYOF_UNICODE_ALL;
- cl->flags &= ~ANYOF_UNICODE;
+ cl->flags &= ~ANYOF_NONBITMAP;
}
+
if (or_with->flags & ANYOF_UNICODE_ALL) {
cl->flags |= ANYOF_UNICODE_ALL;
- cl->flags &= ~ANYOF_UNICODE;
+ cl->flags &= ~ANYOF_NONBITMAP;
}
}
goto do_default;
if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
- || (data->start_class->flags & ANYOF_CLASS));
+ || ((data->start_class->flags & ANYOF_CLASS)
+ && ANYOF_CLASS_TEST_ANY_SET(data->start_class)));
cl_anything(pRExC_state, data->start_class);
}
if (flags & SCF_DO_STCLASS_AND || !value)
(value=='p' ? '+' : '!'), (int)n, RExC_parse);
}
RExC_parse = e + 1;
- ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
+ ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP;
namedclass = ANYOF_MAX; /* no official name, but it's named */
}
break;
RExC_emit += ANYOF_CLASS_ADD_SKIP;
ANYOF_CLASS_ZERO(ret);
}
- ANYOF_FLAGS(ret) |= ANYOF_CLASS|ANYOF_LARGE;
+ ANYOF_FLAGS(ret) |= ANYOF_CLASS;
}
/* a bad range like a-\d, a-[:digit:] ? */
ANYOF_BITMAP_SET(ret, '-');
}
else {
- ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
+ ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP;
Perl_sv_catpvf(aTHX_ listsv,
"%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
}
const UV prevnatvalue = NATIVE_TO_UNI(prevvalue);
const UV natvalue = NATIVE_TO_UNI(value);
stored+=2; /* can't optimize this class */
- ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
+ ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP;
if (prevnatvalue < natvalue) { /* what about > ? */
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
prevnatvalue, natvalue);
EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
/* output any special charclass tests (used mostly under use locale) */
- if (o->flags & ANYOF_CLASS)
+ if (o->flags & ANYOF_CLASS && ANYOF_CLASS_TEST_ANY_SET(o))
for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
if (ANYOF_CLASS_TEST(o,i)) {
sv_catpv(sv, anyofs[i]);
EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
/* output information about the unicode matching */
- if (flags & ANYOF_UNICODE)
+ if (flags & ANYOF_NONBITMAP)
sv_catpvs(sv, "{unicode}");
else if (flags & ANYOF_UNICODE_ALL)
sv_catpvs(sv, "{unicode_all}");
}
else if (op == ANYOF) {
/* arglen 1 + class block */
- node += 1 + ((ANYOF_FLAGS(node) & ANYOF_LARGE)
+ node += 1 + ((ANYOF_FLAGS(node) & ANYOF_CLASS)
? ANYOF_CLASS_SKIP : ANYOF_SKIP);
node = NEXTOPER(node);
}