}
#ifdef DEBUGGING
- /* Allow dumping */
+ /* Allow dumping but overwriting the collection of skipped
+ * ops and/or strings with fake optimized ops */
n = scan + NODE_SZ_STR(scan);
while (n <= stop) {
- if (PL_regkind[OP(n)] != NOTHING || OP(n) == NOTHING) {
- OP(n) = OPTIMIZED;
- NEXT_OFF(n) = 0;
- }
+ OP(n) = OPTIMIZED;
+ FLAGS(n) = 0;
+ NEXT_OFF(n) = 0;
n++;
}
#endif
* But, this is only valid if len is not 0. The consequences of not doing
* this is that the memory allocation code may think that the 1 more UV
* is being used than actually is, and so might do an unnecessary grow.
- * That seems worth not bothering to make this the precise amount */
+ * That seems worth not bothering to make this the precise amount.
+ *
+ * Note that when inverting, SvCUR shouldn't change */
}
PERL_STATIC_INLINE UV
invlist_set_len(invlist, len - 1);
}
}
-#endif
-STATIC void
-S_invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
+void
+Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
{
/* Take the union of two inversion lists and point 'result' to it. If
* 'result' on input points to one of the two lists, the reference count to
*/
UV count = 0;
- PERL_ARGS_ASSERT_INVLIST_UNION;
+ PERL_ARGS_ASSERT__INVLIST_UNION;
/* If either one is empty, the union is the other one */
len_a = invlist_len(a);
return;
}
-STATIC void
-S_invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
+void
+Perl__invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
{
/* Take the intersection of two inversion lists and point 'i' to it. If
* 'i' on input points to one of the two lists, the reference count to that
*/
UV count = 0;
- PERL_ARGS_ASSERT_INVLIST_INTERSECTION;
+ PERL_ARGS_ASSERT__INVLIST_INTERSECTION;
/* If either one is empty, the intersection is null */
len_a = invlist_len(a);
return;
}
+#endif
+
STATIC SV*
S_add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
{
range_invlist = _new_invlist(2);
_append_range_to_invlist(range_invlist, start, end);
- invlist_union(invlist, range_invlist, &invlist);
+ _invlist_union(invlist, range_invlist, &invlist);
/* The temporary can be freed */
SvREFCNT_dec(range_invlist);
return add_range_to_invlist(invlist, cp, cp);
}
+#ifndef PERL_IN_XSUB_RE
+void
+Perl__invlist_invert(pTHX_ SV* const invlist)
+{
+ /* Complement the input inversion list. This adds a 0 if the list didn't
+ * have a zero; removes it otherwise. As described above, the data
+ * structure is set up so that this is very efficient */
+
+ UV* len_pos = get_invlist_len_addr(invlist);
+
+ PERL_ARGS_ASSERT__INVLIST_INVERT;
+
+ /* The inverse of matching nothing is matching everything */
+ if (*len_pos == 0) {
+ _append_range_to_invlist(invlist, 0, UV_MAX);
+ return;
+ }
+
+ /* The exclusive or complents 0 to 1; and 1 to 0. If the result is 1, the
+ * zero element was a 0, so it is being removed, so the length decrements
+ * by 1; and vice-versa. SvCUR is unaffected */
+ if (*get_invlist_zero_addr(invlist) ^= 1) {
+ (*len_pos)--;
+ }
+ else {
+ (*len_pos)++;
+ }
+}
+#endif
+
+PERL_STATIC_INLINE SV*
+S_invlist_clone(pTHX_ SV* const invlist)
+{
+
+ /* Return a new inversion list that is a copy of the input one, which is
+ * unchanged */
+
+ SV* new_invlist = _new_invlist(SvCUR(invlist));
+
+ PERL_ARGS_ASSERT_INVLIST_CLONE;
+
+ Copy(SvPVX(invlist), SvPVX(new_invlist), SvCUR(invlist), char);
+ return new_invlist;
+}
+
+#ifndef PERL_IN_XSUB_RE
+void
+Perl__invlist_subtract(pTHX_ SV* const a, SV* const b, SV** result)
+{
+ /* Point result to an inversion list which consists of all elements in 'a'
+ * that aren't also in 'b' */
+
+ PERL_ARGS_ASSERT__INVLIST_SUBTRACT;
+
+ /* Subtracting nothing retains the original */
+ if (invlist_len(b) == 0) {
+
+ /* If the result is not to be the same variable as the original, create
+ * a copy */
+ if (result != &a) {
+ *result = invlist_clone(a);
+ }
+ } else {
+ SV *b_copy = invlist_clone(b);
+ _invlist_invert(b_copy); /* Everything not in 'b' */
+ _invlist_intersection(a, b_copy, result); /* Everything in 'a' not in
+ 'b' */
+ SvREFCNT_dec(b_copy);
+ }
+
+ if (result == &b) {
+ SvREFCNT_dec(b);
+ }
+
+ return;
+}
+#endif
+
PERL_STATIC_INLINE UV*
S_get_invlist_iter_addr(pTHX_ SV* invlist)
{
* be checked. Get the intersection of this class and all the
* possible characters that are foldable. This can quickly narrow
* down a large class */
- invlist_intersection(PL_utf8_foldable, nonbitmap, &fold_intersection);
+ _invlist_intersection(PL_utf8_foldable, nonbitmap, &fold_intersection);
/* Now look at the foldable characters in this class individually */
invlist_iterinit(fold_intersection);
/* Combine the two lists into one. */
if (l1_fold_invlist) {
if (nonbitmap) {
- invlist_union(nonbitmap, l1_fold_invlist, &nonbitmap);
+ _invlist_union(nonbitmap, l1_fold_invlist, &nonbitmap);
SvREFCNT_dec(l1_fold_invlist);
}
else {
* nothing like \w in it; some thought also would have to be given to the
* interaction with above 0x100 chars */
if (! LOC
- && (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT
+ && (ANYOF_FLAGS(ret) & ANYOF_INVERT)
&& ! unicode_alternate
- && ! nonbitmap
+ /* In case of /d, there are some things that should match only when in
+ * not in the bitmap, i.e., they require UTF8 to match. These are
+ * listed in nonbitmap. */
+ && (! nonbitmap
+ || ! DEPENDS_SEMANTICS
+ || (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
&& SvCUR(listsv) == initial_listsv_len)
{
- for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
- ANYOF_BITMAP(ret)[value] ^= 0xFF;
+ if (! nonbitmap) {
+ for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
+ ANYOF_BITMAP(ret)[value] ^= 0xFF;
+ /* The inversion means that everything above 255 is matched */
+ ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
+ }
+ else {
+ /* Here, also has things outside the bitmap. Go through each bit
+ * individually and add it to the list to get rid of from those
+ * things not in the bitmap */
+ SV *remove_list = _new_invlist(2);
+ _invlist_invert(nonbitmap);
+ for (value = 0; value < 256; ++value) {
+ if (ANYOF_BITMAP_TEST(ret, value)) {
+ ANYOF_BITMAP_CLEAR(ret, value);
+ remove_list = add_cp_to_invlist(remove_list, value);
+ }
+ else {
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ }
+ _invlist_subtract(nonbitmap, remove_list, &nonbitmap);
+ SvREFCNT_dec(remove_list);
+ }
+
stored = 256 - stored;
- /* The inversion means that everything above 255 is matched; and at the
- * same time we clear the invert flag */
- ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
+ /* Clear the invert flag since have just done it here */
+ ANYOF_FLAGS(ret) &= ~ANYOF_INVERT;
}
/* Folding in the bitmap is taken care of above, but not for locale (for