/* Look up the property name, and get its swash and
* inversion list, if the property is found */
- if (! (ANYOF_FLAGS(ret) & ANYOF_INVERT)) {
if (swash) {
SvREFCNT_dec(swash);
}
undefined properties */
NULL, FALSE /* No inversion list */
);
- }
-
- if ( ANYOF_FLAGS(ret) & ANYOF_INVERT
- || ! swash
+ if ( ! swash
|| ! SvROK(swash)
|| ! SvTYPE(SvRV(swash)) == SVt_PVHV
|| ! (invlistsvp =
Safefree(name);
}
RExC_parse = e + 1;
-
- /* The \p could match something in the Latin1 range, hence
- * something that isn't utf8 */
- ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP_NON_UTF8;
namedclass = ANYOF_MAX; /* no official name, but it's named */
/* \p means they want Unicode semantics */
}
}
+ /* Here, <nonbitmap> contains all the code points we can determine at
+ * compile time that we haven't put into the bitmap. Go through it, and
+ * for things that belong in the bitmap, put them there, and delete from
+ * <nonbitmap> */
+ if (nonbitmap) {
+
+ /* Above-ASCII code points in /d have to stay in <nonbitmap>, as they
+ * possibly only should match when the target string is UTF-8 */
+ UV max_cp_to_set = (DEPENDS_SEMANTICS) ? 127 : 255;
+
+ /* This gets set if we actually need to modify things */
+ bool change_invlist = FALSE;
+
+ UV start, end;
+
+ /* Start looking through <nonbitmap> */
+ invlist_iterinit(nonbitmap);
+ while (invlist_iternext(nonbitmap, &start, &end)) {
+ UV high;
+ int i;
+
+ /* Quit if are above what we should change */
+ if (start > max_cp_to_set) {
+ break;
+ }
+
+ change_invlist = TRUE;
+
+ /* Set all the bits in the range, up to the max that we are doing */
+ high = (end < max_cp_to_set) ? end : max_cp_to_set;
+ for (i = start; i <= (int) high; i++) {
+ if (! ANYOF_BITMAP_TEST(ret, i)) {
+ ANYOF_BITMAP_SET(ret, i);
+ stored++;
+ prevvalue = value;
+ value = i;
+ }
+ }
+ }
+
+ /* Done with loop; set <nonbitmap> to not include any code points that
+ * are in the bitmap */
+ if (change_invlist) {
+ SV* keep_list = _new_invlist(2);
+ _append_range_to_invlist(keep_list, max_cp_to_set + 1, UV_MAX);
+ _invlist_intersection(nonbitmap, keep_list, &nonbitmap);
+ SvREFCNT_dec(keep_list);
+ }
+
+ /* If have completely emptied it, remove it completely */
+ if (invlist_len(nonbitmap) == 0) {
+ SvREFCNT_dec(nonbitmap);
+ nonbitmap = NULL;
+ }
+ }
/* Here, we have calculated what code points should be in the character
- * class.
+ * class. <nonbitmap> does not overlap the bitmap except possibly in the
+ * case of DEPENDS rules.
*
* Now we can see about various optimizations. Fold calculation (which we
* did above) needs to take place before inversion. Otherwise /[^k]/i
ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
}
else {
- /* Here, also has things outside the bitmap. Go through each bit
- * individually and add it to the list to get rid of from those
- * things not in the bitmap */
- SV *remove_list = _new_invlist(2);
+ /* Here, also has things outside the bitmap that may overlap with
+ * the bitmap. We have to sync them up, so that they get inverted
+ * in both places. Earlier, we removed all overlaps except in the
+ * case of /d rules, so no syncing is needed except for this case
+ */
+ SV *remove_list = NULL;
+
+ if (DEPENDS_SEMANTICS) {
+ UV start, end;
+
+ /* Set the bits that correspond to the ones that aren't in the
+ * bitmap. Otherwise, when we invert, we'll miss these.
+ * Earlier, we removed from the nonbitmap all code points
+ * < 128, so there is no extra work here */
+ invlist_iterinit(nonbitmap);
+ while (invlist_iternext(nonbitmap, &start, &end)) {
+ if (start > 255) { /* The bit map goes to 255 */
+ break;
+ }
+ if (end > 255) {
+ end = 255;
+ }
+ for (i = start; i <= (int) end; ++i) {
+ ANYOF_BITMAP_SET(ret, i);
+ prevvalue = value;
+ value = i;
+ }
+ }
+ }
/* Now invert both the bitmap and the nonbitmap. Anything in the
- * bitmap has to also be removed from the non-bitmap */
+ * bitmap has to also be removed from the non-bitmap, but again,
+ * there should not be overlap unless is /d rules. */
_invlist_invert(nonbitmap);
+
for (i = 0; i < 256; ++i) {
if (ANYOF_BITMAP_TEST(ret, i)) {
ANYOF_BITMAP_CLEAR(ret, i);
- remove_list = add_cp_to_invlist(remove_list, i);
+ if (DEPENDS_SEMANTICS) {
+ if (! remove_list) {
+ remove_list = _new_invlist(2);
+ }
+ remove_list = add_cp_to_invlist(remove_list, i);
+ }
}
else {
ANYOF_BITMAP_SET(ret, i);
}
/* And do the removal */
- _invlist_subtract(nonbitmap, remove_list, &nonbitmap);
- SvREFCNT_dec(remove_list);
+ if (DEPENDS_SEMANTICS) {
+ if (remove_list) {
+ _invlist_subtract(nonbitmap, remove_list, &nonbitmap);
+ SvREFCNT_dec(remove_list);
+ }
+ }
+ else {
+ /* There is no overlap for non-/d, so just delete anything
+ * below 256 */
+ SV* keep_list = _new_invlist(2);
+ _append_range_to_invlist(keep_list, 256, UV_MAX);
+ _invlist_intersection(nonbitmap, keep_list, &nonbitmap);
+ SvREFCNT_dec(keep_list);
+ }
}
stored = 256 - stored;