#include "dquote_static.c"
#include "charclass_invlists.h"
+#include "inline_invlist.c"
#define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
#define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
* Some of the methods should always be private to the implementation, and some
* should eventually be made public */
-#define INVLIST_LEN_OFFSET 0 /* Number of elements in the inversion list */
-#define INVLIST_ITER_OFFSET 1 /* Current iteration position */
+/* The header definitions are in F<inline_invlist.c> */
-/* This is a combination of a version and data structure type, so that one
- * being passed in can be validated to be an inversion list of the correct
- * vintage. When the structure of the header is changed, a new random number
- * in the range 2**31-1 should be generated and the new() method changed to
- * insert that at this location. Then, if an auxiliary program doesn't change
- * correspondingly, it will be discovered immediately */
-#define INVLIST_VERSION_ID_OFFSET 2
-#define INVLIST_VERSION_ID 1064334010
-
-/* For safety, when adding new elements, remember to #undef them at the end of
- * the inversion list code section */
-
-#define INVLIST_ZERO_OFFSET 3 /* 0 or 1; must be last element in header */
-/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list
- * contains the code point U+00000, and begins here. If 1, the inversion list
- * doesn't contain U+0000, and it begins at the next UV in the array.
- * Inverting an inversion list consists of adding or removing the 0 at the
- * beginning of it. By reserving a space for that 0, inversion can be made
- * very fast */
-
-#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 1)
-
-/* Internally things are UVs */
#define TO_INTERNAL_SIZE(x) ((x + HEADER_LENGTH) * sizeof(UV))
#define FROM_INTERNAL_SIZE(x) ((x / sizeof(UV)) - HEADER_LENGTH)
PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT;
/* Must be empty */
- assert(! *get_invlist_len_addr(invlist));
+ assert(! *_get_invlist_len_addr(invlist));
/* 1^1 = 0; 1^0 = 1 */
*zero = 1 ^ will_have_0;
/* Must not be empty. If these fail, you probably didn't check for <len>
* being non-zero before trying to get the array */
- assert(*get_invlist_len_addr(invlist));
+ assert(*_get_invlist_len_addr(invlist));
assert(*get_invlist_zero_addr(invlist) == 0
|| *get_invlist_zero_addr(invlist) == 1);
+ *get_invlist_zero_addr(invlist));
}
-PERL_STATIC_INLINE UV*
-S_get_invlist_len_addr(pTHX_ SV* invlist)
-{
- /* Return the address of the UV that contains the current number
- * of used elements in the inversion list */
-
- PERL_ARGS_ASSERT_GET_INVLIST_LEN_ADDR;
-
- return (UV *) (SvPVX(invlist) + (INVLIST_LEN_OFFSET * sizeof (UV)));
-}
-
-PERL_STATIC_INLINE UV
-S_invlist_len(pTHX_ SV* const invlist)
-{
- /* Returns the current number of elements stored in the inversion list's
- * array */
-
- PERL_ARGS_ASSERT_INVLIST_LEN;
-
- return *get_invlist_len_addr(invlist);
-}
-
PERL_STATIC_INLINE void
S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
{
PERL_ARGS_ASSERT_INVLIST_SET_LEN;
- *get_invlist_len_addr(invlist) = len;
+ *_get_invlist_len_addr(invlist) = len;
assert(len <= SvLEN(invlist));
* Note that when inverting, SvCUR shouldn't change */
}
+PERL_STATIC_INLINE IV*
+S_get_invlist_previous_index_addr(pTHX_ SV* invlist)
+{
+ /* Return the address of the UV that is reserved to hold the cached index
+ * */
+
+ PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
+
+ return (IV *) (SvPVX(invlist) + (INVLIST_PREVIOUS_INDEX_OFFSET * sizeof (UV)));
+}
+
+PERL_STATIC_INLINE IV
+S_invlist_previous_index(pTHX_ SV* const invlist)
+{
+ /* Returns cached index of previous search */
+
+ PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX;
+
+ return *get_invlist_previous_index_addr(invlist);
+}
+
+PERL_STATIC_INLINE void
+S_invlist_set_previous_index(pTHX_ SV* const invlist, const IV index)
+{
+ /* Caches <index> for later retrieval */
+
+ PERL_ARGS_ASSERT_INVLIST_SET_PREVIOUS_INDEX;
+
+ assert(index == 0 || index < (int) _invlist_len(invlist));
+
+ *get_invlist_previous_index_addr(invlist) = index;
+}
+
PERL_STATIC_INLINE UV
S_invlist_max(pTHX_ SV* const invlist)
{
* properly */
*get_invlist_zero_addr(new_list) = UV_MAX;
+ *get_invlist_previous_index_addr(new_list) = 0;
*get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID;
-#if HEADER_LENGTH != 4
+#if HEADER_LENGTH != 5
# error Need to regenerate VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length
#endif
SvPV_set(invlist, (char *) list);
SvLEN_set(invlist, 0); /* Means we own the contents, and the system
shouldn't touch it */
- SvCUR_set(invlist, TO_INTERNAL_SIZE(invlist_len(invlist)));
+ SvCUR_set(invlist, TO_INTERNAL_SIZE(_invlist_len(invlist)));
if (*get_invlist_version_id_addr(invlist) != INVLIST_VERSION_ID) {
Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
SvPV_shrink_to_cur((SV *) invlist);
}
-/* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
- * etc */
-#define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1))
-#define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i))
-
#define _invlist_union_complement_2nd(a, b, output) _invlist_union_maybe_complement_2nd(a, b, TRUE, output)
STATIC void
UV* array;
UV max = invlist_max(invlist);
- UV len = invlist_len(invlist);
+ UV len = _invlist_len(invlist);
PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST;
* contains <cp> */
IV low = 0;
- IV high = invlist_len(invlist);
+ IV mid;
+ IV high = _invlist_len(invlist);
const IV highest_element = high - 1;
const UV* array;
* can't combine this with the test above, because we can't get the array
* unless we know the list is non-empty) */
array = invlist_array(invlist);
- if (cp < array[0]) {
- return -1;
+
+ mid = invlist_previous_index(invlist);
+ assert(mid >=0 && mid <= highest_element);
+
+ /* <mid> contains the cache of the result of the previous call to this
+ * function (0 the first time). See if this call is for the same result,
+ * or if it is for mid-1. This is under the theory that calls to this
+ * function will often be for related code points that are near each other.
+ * And benchmarks show that caching gives better results. We also test
+ * here if the code point is within the bounds of the list. These tests
+ * replace others that would have had to be made anyway to make sure that
+ * the array bounds were not exceeded, and give us extra information at the
+ * same time */
+ if (cp >= array[mid]) {
+ if (cp >= array[highest_element]) {
+ return highest_element;
+ }
+
+ /* Here, array[mid] <= cp < array[highest_element]. This means that
+ * the final element is not the answer, so can exclude it; it also
+ * means that <mid> is not the final element, so can refer to 'mid + 1'
+ * safely */
+ if (cp < array[mid + 1]) {
+ return mid;
+ }
+ high--;
+ low = mid + 1;
+ }
+ else { /* cp < aray[mid] */
+ if (cp < array[0]) { /* Fail if outside the array */
+ return -1;
+ }
+ high = mid;
+ if (cp >= array[mid - 1]) {
+ goto found_entry;
+ }
}
/* Binary search. What we are looking for is <i> such that
* The loop below converges on the i+1. Note that there may not be an
* (i+1)th element in the array, and things work nonetheless */
while (low < high) {
- IV mid = (low + high) / 2;
+ mid = (low + high) / 2;
assert(mid <= highest_element);
if (array[mid] <= cp) { /* cp >= array[mid] */
low = mid + 1;
}
}
- return high - 1;
+ found_entry:
+ high--;
+ invlist_set_previous_index(invlist, high);
+ return high;
}
void
* that <swatch> is all 0's on input */
UV current = start;
- const IV len = invlist_len(invlist);
+ const IV len = _invlist_len(invlist);
IV i;
const UV * array;
assert(a != b);
/* If either one is empty, the union is the other one */
- if (a == NULL || ((len_a = invlist_len(a)) == 0)) {
+ if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
if (*output == a) {
if (a != NULL) {
SvREFCNT_dec(a);
} /* else *output already = b; */
return;
}
- else if ((len_b = invlist_len(b)) == 0) {
+ else if ((len_b = _invlist_len(b)) == 0) {
if (*output == b) {
SvREFCNT_dec(b);
}
/* Set result to final length, which can change the pointer to array_u, so
* re-find it */
- if (len_u != invlist_len(u)) {
+ if (len_u != _invlist_len(u)) {
invlist_set_len(u, len_u);
invlist_trim(u);
array_u = invlist_array(u);
assert(a != b);
/* Special case if either one is empty */
- len_a = invlist_len(a);
- if ((len_a == 0) || ((len_b = invlist_len(b)) == 0)) {
+ len_a = _invlist_len(a);
+ if ((len_a == 0) || ((len_b = _invlist_len(b)) == 0)) {
if (len_a != 0 && complement_b) {
/* Set result to final length, which can change the pointer to array_r, so
* re-find it */
- if (len_r != invlist_len(r)) {
+ if (len_r != _invlist_len(r)) {
invlist_set_len(r, len_r);
invlist_trim(r);
array_r = invlist_array(r);
len = 0;
}
else {
- len = invlist_len(invlist);
+ len = _invlist_len(invlist);
}
/* If comes after the final entry, can just append it to the end */
if (len == 0
|| start >= invlist_array(invlist)
- [invlist_len(invlist) - 1])
+ [_invlist_len(invlist) - 1])
{
_append_range_to_invlist(invlist, start, end);
return invlist;
#endif
-PERL_STATIC_INLINE bool
-S__invlist_contains_cp(pTHX_ SV* const invlist, const UV cp)
-{
- /* Does <invlist> contain code point <cp> as part of the set? */
-
- IV index = _invlist_search(invlist, cp);
-
- PERL_ARGS_ASSERT__INVLIST_CONTAINS_CP;
-
- return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index);
-}
-
PERL_STATIC_INLINE SV*
S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) {
return _add_range_to_invlist(invlist, cp, cp);
* have a zero; removes it otherwise. As described above, the data
* structure is set up so that this is very efficient */
- UV* len_pos = get_invlist_len_addr(invlist);
+ UV* len_pos = _get_invlist_len_addr(invlist);
PERL_ARGS_ASSERT__INVLIST_INVERT;
_invlist_invert(invlist);
- len = invlist_len(invlist);
+ len = _invlist_len(invlist);
if (len != 0) { /* If empty do nothing */
array = invlist_array(invlist);
/* Need to allocate extra space to accommodate Perl's addition of a
* trailing NUL to SvPV's, since it thinks they are always strings */
- SV* new_invlist = _new_invlist(invlist_len(invlist) + 1);
+ SV* new_invlist = _new_invlist(_invlist_len(invlist) + 1);
STRLEN length = SvCUR(invlist);
PERL_ARGS_ASSERT_INVLIST_CLONE;
* will start over at the beginning of the list */
UV* pos = get_invlist_iter_addr(invlist);
- UV len = invlist_len(invlist);
+ UV len = _invlist_len(invlist);
UV *array;
PERL_ARGS_ASSERT_INVLIST_ITERNEXT;
* 0, or if the list is empty. If this distinction matters to you, check
* for emptiness before calling this function */
- UV len = invlist_len(invlist);
+ UV len = _invlist_len(invlist);
UV *array;
PERL_ARGS_ASSERT_INVLIST_HIGHEST;
UV* array_a = invlist_array(a);
UV* array_b = invlist_array(b);
- UV len_a = invlist_len(a);
- UV len_b = invlist_len(b);
+ UV len_a = _invlist_len(a);
+ UV len_b = _invlist_len(b);
UV i = 0; /* current index into the arrays */
bool retval = TRUE; /* Assume are identical until proven otherwise */
STRLEN foldlen;
U8 node_type;
bool next_is_quantifier;
- char * oldp;
+ char * oldp = NULL;
ender = 0;
node_type = compute_EXACTish(pRExC_state);
* rules hard-coded into Perl. (This case happens legitimately
* during compilation of Perl itself before the Unicode tables
* are generated) */
- if (invlist_len(PL_utf8_foldable) == 0) {
+ if (_invlist_len(PL_utf8_foldable) == 0) {
PL_utf8_foldclosures = newHV();
}
else {
}
/* If have completely emptied it, remove it completely */
- if (invlist_len(cp_list) == 0) {
+ if (_invlist_len(cp_list) == 0) {
SvREFCNT_dec(cp_list);
cp_list = NULL;
}