* Some of the methods should always be private to the implementation, and some
* should eventually be made public */
-#define INVLIST_LEN_OFFSET 0 /* Number of elements in the inversion list */
-#define INVLIST_ITER_OFFSET 1 /* Current iteration position */
-
-/* This is a combination of a version and data structure type, so that one
- * being passed in can be validated to be an inversion list of the correct
- * vintage. When the structure of the header is changed, a new random number
- * in the range 2**31-1 should be generated and the new() method changed to
- * insert that at this location. Then, if an auxiliary program doesn't change
- * correspondingly, it will be discovered immediately */
-#define INVLIST_VERSION_ID_OFFSET 2
-#define INVLIST_VERSION_ID 1064334010
-
-/* For safety, when adding new elements, remember to #undef them at the end of
- * the inversion list code section */
-
-#define INVLIST_ZERO_OFFSET 3 /* 0 or 1; must be last element in header */
-/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list
- * contains the code point U+00000, and begins here. If 1, the inversion list
- * doesn't contain U+0000, and it begins at the next UV in the array.
- * Inverting an inversion list consists of adding or removing the 0 at the
- * beginning of it. By reserving a space for that 0, inversion can be made
- * very fast */
-
-#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 1)
-
-/* Internally things are UVs */
+/* The header definitions are in F<inline_invlist.c> */
+
#define TO_INTERNAL_SIZE(x) ((x + HEADER_LENGTH) * sizeof(UV))
#define FROM_INTERNAL_SIZE(x) ((x / sizeof(UV)) - HEADER_LENGTH)
+ *get_invlist_zero_addr(invlist));
}
-PERL_STATIC_INLINE UV*
-S__get_invlist_len_addr(pTHX_ SV* invlist)
-{
- /* Return the address of the UV that contains the current number
- * of used elements in the inversion list */
-
- PERL_ARGS_ASSERT__GET_INVLIST_LEN_ADDR;
-
- return (UV *) (SvPVX(invlist) + (INVLIST_LEN_OFFSET * sizeof (UV)));
-}
-PERL_STATIC_INLINE UV
-S__invlist_len(pTHX_ SV* const invlist)
-{
- /* Returns the current number of elements stored in the inversion list's
- * array */
-
- PERL_ARGS_ASSERT__INVLIST_LEN;
-
- return *_get_invlist_len_addr(invlist);
-}
-
PERL_STATIC_INLINE void
S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
{
* Note that when inverting, SvCUR shouldn't change */
}
+PERL_STATIC_INLINE IV*
+S_get_invlist_previous_index_addr(pTHX_ SV* invlist)
+{
+ /* Return the address of the UV that is reserved to hold the cached index
+ * */
+
+ PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
+
+ return (IV *) (SvPVX(invlist) + (INVLIST_PREVIOUS_INDEX_OFFSET * sizeof (UV)));
+}
+
+PERL_STATIC_INLINE IV
+S_invlist_previous_index(pTHX_ SV* const invlist)
+{
+ /* Returns cached index of previous search */
+
+ PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX;
+
+ return *get_invlist_previous_index_addr(invlist);
+}
+
+PERL_STATIC_INLINE void
+S_invlist_set_previous_index(pTHX_ SV* const invlist, const IV index)
+{
+ /* Caches <index> for later retrieval */
+
+ PERL_ARGS_ASSERT_INVLIST_SET_PREVIOUS_INDEX;
+
+ assert(index == 0 || index < (int) _invlist_len(invlist));
+
+ *get_invlist_previous_index_addr(invlist) = index;
+}
+
PERL_STATIC_INLINE UV
S_invlist_max(pTHX_ SV* const invlist)
{
* properly */
*get_invlist_zero_addr(new_list) = UV_MAX;
+ *get_invlist_previous_index_addr(new_list) = 0;
*get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID;
-#if HEADER_LENGTH != 4
+#if HEADER_LENGTH != 5
# error Need to regenerate VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length
#endif
SvPV_shrink_to_cur((SV *) invlist);
}
-/* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
- * etc */
-#define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1))
-#define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i))
-
#define _invlist_union_complement_2nd(a, b, output) _invlist_union_maybe_complement_2nd(a, b, TRUE, output)
STATIC void
* contains <cp> */
IV low = 0;
+ IV mid;
IV high = _invlist_len(invlist);
const IV highest_element = high - 1;
const UV* array;
* can't combine this with the test above, because we can't get the array
* unless we know the list is non-empty) */
array = invlist_array(invlist);
- if (cp < array[0]) {
- return -1;
+
+ mid = invlist_previous_index(invlist);
+ assert(mid >=0 && mid <= highest_element);
+
+ /* <mid> contains the cache of the result of the previous call to this
+ * function (0 the first time). See if this call is for the same result,
+ * or if it is for mid-1. This is under the theory that calls to this
+ * function will often be for related code points that are near each other.
+ * And benchmarks show that caching gives better results. We also test
+ * here if the code point is within the bounds of the list. These tests
+ * replace others that would have had to be made anyway to make sure that
+ * the array bounds were not exceeded, and give us extra information at the
+ * same time */
+ if (cp >= array[mid]) {
+ if (cp >= array[highest_element]) {
+ return highest_element;
+ }
+
+ /* Here, array[mid] <= cp < array[highest_element]. This means that
+ * the final element is not the answer, so can exclude it; it also
+ * means that <mid> is not the final element, so can refer to 'mid + 1'
+ * safely */
+ if (cp < array[mid + 1]) {
+ return mid;
+ }
+ high--;
+ low = mid + 1;
+ }
+ else { /* cp < aray[mid] */
+ if (cp < array[0]) { /* Fail if outside the array */
+ return -1;
+ }
+ high = mid;
+ if (cp >= array[mid - 1]) {
+ goto found_entry;
+ }
}
/* Binary search. What we are looking for is <i> such that
* The loop below converges on the i+1. Note that there may not be an
* (i+1)th element in the array, and things work nonetheless */
while (low < high) {
- IV mid = (low + high) / 2;
+ mid = (low + high) / 2;
assert(mid <= highest_element);
if (array[mid] <= cp) { /* cp >= array[mid] */
low = mid + 1;
}
}
- return high - 1;
+ found_entry:
+ high--;
+ invlist_set_previous_index(invlist, high);
+ return high;
}
void
#endif
-PERL_STATIC_INLINE bool
-S__invlist_contains_cp(pTHX_ SV* const invlist, const UV cp)
-{
- /* Does <invlist> contain code point <cp> as part of the set? */
-
- IV index = _invlist_search(invlist, cp);
-
- PERL_ARGS_ASSERT__INVLIST_CONTAINS_CP;
-
- return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index);
-}
-
PERL_STATIC_INLINE SV*
S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) {
return _add_range_to_invlist(invlist, cp, cp);