X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/5eb114cff44a7920ae194ed22b1af045e54f96d4..2b29d4bc27bf0f6ecfae06ec423a6643b3a6fe03:/inline_invlist.c diff --git a/inline_invlist.c b/inline_invlist.c index 936a298..1589f95 100644 --- a/inline_invlist.c +++ b/inline_invlist.c @@ -8,59 +8,44 @@ #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) -#define INVLIST_LEN_OFFSET 0 /* Number of elements in the inversion list */ - -/* This is a combination of a version and data structure type, so that one - * being passed in can be validated to be an inversion list of the correct - * vintage. When the structure of the header is changed, a new random number - * in the range 2**31-1 should be generated. Then, if an auxiliary program - * doesn't change correspondingly, it will be discovered immediately */ -#define INVLIST_VERSION_ID_OFFSET 1 -#define INVLIST_VERSION_ID 1511554547 - -#define INVLIST_OFFSET_OFFSET 2 /* 0 or 1 */ -/* The UV at this position contains either 0 or 1. If 0, the inversion list - * contains the code point U+00000, and begins at element [0] in the array, - * which always contains 0. If 1, the inversion list doesn't contain U+0000, - * and it begins at element [1]. Inverting an inversion list consists of - * adding or removing the 0 at the beginning of it. By reserving a space for - * that 0, inversion can be made very fast: we just flip this UV */ - -/* For safety, when adding new elements, remember to #undef them at the end of - * the inversion list code section */ - -#define HEADER_LENGTH (INVLIST_OFFSET_OFFSET + 1) /* includes 1 for the constant - 0 element */ - /* An element is in an inversion list iff its index is even numbered: 0, 2, 4, * etc */ #define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1)) #define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i)) -PERL_STATIC_INLINE STRLEN* -S__get_invlist_len_addr(pTHX_ SV* invlist) +/* This converts to/from our UVs to what the SV code is expecting: bytes. */ +#define TO_INTERNAL_SIZE(x) ((x) * sizeof(UV)) +#define FROM_INTERNAL_SIZE(x) ((x)/ sizeof(UV)) + +PERL_STATIC_INLINE bool* +S_get_invlist_offset_addr(SV* invlist) { - /* Return the address of the UV that contains the current number - * of used elements in the inversion list */ + /* Return the address of the field that says whether the inversion list is + * offset (it contains 1) or not (contains 0) */ + PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR; - PERL_ARGS_ASSERT__GET_INVLIST_LEN_ADDR; + assert(SvTYPE(invlist) == SVt_INVLIST); - return &(LvTARGLEN(invlist)); + return &(((XINVLIST*) SvANY(invlist))->is_offset); } PERL_STATIC_INLINE UV -S__invlist_len(pTHX_ SV* const invlist) +S__invlist_len(SV* const invlist) { /* Returns the current number of elements stored in the inversion list's * array */ PERL_ARGS_ASSERT__INVLIST_LEN; - return *_get_invlist_len_addr(invlist); + assert(SvTYPE(invlist) == SVt_INVLIST); + + return (SvCUR(invlist) == 0) + ? 0 + : FROM_INTERNAL_SIZE(SvCUR(invlist)) - *get_invlist_offset_addr(invlist); } PERL_STATIC_INLINE bool -S__invlist_contains_cp(pTHX_ SV* const invlist, const UV cp) +S__invlist_contains_cp(SV* const invlist, const UV cp) { /* Does contain code point as part of the set? */ @@ -71,4 +56,32 @@ S__invlist_contains_cp(pTHX_ SV* const invlist, const UV cp) return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index); } +PERL_STATIC_INLINE UV* +S_invlist_array(SV* const invlist) +{ + /* Returns the pointer to the inversion list's array. Every time the + * length changes, this needs to be called in case malloc or realloc moved + * it */ + + PERL_ARGS_ASSERT_INVLIST_ARRAY; + + /* Must not be empty. If these fail, you probably didn't check for + * being non-zero before trying to get the array */ + assert(_invlist_len(invlist)); + + /* The very first element always contains zero, The array begins either + * there, or if the inversion list is offset, at the element after it. + * The offset header field determines which; it contains 0 or 1 to indicate + * how much additionally to add */ + assert(0 == *(SvPVX(invlist))); + return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist)); +} + +# if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGEXEC_C) + +/* These symbols are only needed later in regcomp.c */ +# undef TO_INTERNAL_SIZE +# undef FROM_INTERNAL_SIZE +# endif + #endif