From b6a0ff336ac042af856687d8dad55d9515a4779b Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 25 Nov 2011 12:59:51 -0700 Subject: [PATCH] regcomp.c: Add _invlist_populate_swatch() This function will be used in future commits --- embed.fnc | 1 + embed.h | 1 + proto.h | 6 +++++ regcomp.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+) diff --git a/embed.fnc b/embed.fnc index 97b9d26..9d2f239 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1384,6 +1384,7 @@ EXMpR |HV* |_swash_inversion_hash |NN SV* const swash EXMpR |SV* |_new_invlist |IV initial_size EXMpR |SV* |_swash_to_invlist |NN SV* const swash EXMp |void |_append_range_to_invlist |NN SV* const invlist|const UV start|const UV end +EXMp |void |_invlist_populate_swatch |NN SV* const invlist|const UV start|const UV end|NN U8* swatch #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) EXp |SV* |_core_swash_init|NN const char* pkg|NN const char* name|NN SV* listsv|I32 minbits|I32 none diff --git a/embed.h b/embed.h index e245da5..8f0b74e 100644 --- a/embed.h +++ b/embed.h @@ -951,6 +951,7 @@ #define _invlist_intersection(a,b,c) Perl__invlist_intersection(aTHX_ a,b,c) #define _invlist_invert(a) Perl__invlist_invert(aTHX_ a) #define _invlist_invert_prop(a) Perl__invlist_invert_prop(aTHX_ a) +#define _invlist_populate_swatch(a,b,c,d) Perl__invlist_populate_swatch(aTHX_ a,b,c,d) #define _invlist_subtract(a,b,c) Perl__invlist_subtract(aTHX_ a,b,c) #define _invlist_union(a,b,c) Perl__invlist_union(aTHX_ a,b,c) #define _new_invlist(a) Perl__new_invlist(aTHX_ a) diff --git a/proto.h b/proto.h index e7ec154..c4dc4b3 100644 --- a/proto.h +++ b/proto.h @@ -6561,6 +6561,12 @@ PERL_CALLCONV void Perl__invlist_invert_prop(pTHX_ SV* const invlist) #define PERL_ARGS_ASSERT__INVLIST_INVERT_PROP \ assert(invlist) +PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV end, U8* swatch) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_4); +#define PERL_ARGS_ASSERT__INVLIST_POPULATE_SWATCH \ + assert(invlist); assert(swatch) + PERL_CALLCONV void Perl__invlist_subtract(pTHX_ SV* const a, SV* const b, SV** result) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) diff --git a/regcomp.c b/regcomp.c index b8a4339..347f419 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6185,6 +6185,89 @@ S_invlist_search(pTHX_ SV* const invlist, const UV cp) } void +Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV end, U8* swatch) +{ + /* populates a swatch of a swash the same way swatch_get() does in utf8.c, + * but is used when the swash has an inversion list. This makes this much + * faster, as it uses a binary search instead of a linear one. This is + * intimately tied to that function, and perhaps should be in utf8.c, + * except it is intimately tied to inversion lists as well. It assumes + * that is all 0's on input */ + + UV current = start; + const IV len = invlist_len(invlist); + IV i; + const UV * array; + + PERL_ARGS_ASSERT__INVLIST_POPULATE_SWATCH; + + if (len == 0) { /* Empty inversion list */ + return; + } + + array = invlist_array(invlist); + + /* Find which element it is */ + i = invlist_search(invlist, start); + + /* We populate from to */ + while (current < end) { + UV upper; + + /* The inversion list gives the results for every possible code point + * after the first one in the list. Only those ranges whose index is + * even are ones that the inversion list matches. For the odd ones, + * and if the initial code point is not in the list, we have to skip + * forward to the next element */ + if (i == -1 || ! ELEMENT_RANGE_MATCHES_INVLIST(i)) { + i++; + if (i >= len) { /* Finished if beyond the end of the array */ + return; + } + current = array[i]; + if (current >= end) { /* Finished if beyond the end of what we + are populating */ + return; + } + } + assert(current >= start); + + /* The current range ends one below the next one, except don't go past + * */ + i++; + upper = (i < len && array[i] < end) ? array[i] : end; + + /* Here we are in a range that matches. Populate a bit in the 3-bit U8 + * for each code point in it */ + for (; current < upper; current++) { + const STRLEN offset = (STRLEN)(current - start); + swatch[offset >> 3] |= 1 << (offset & 7); + } + + /* Quit if at the end of the list */ + if (i >= len) { + + /* But first, have to deal with the highest possible code point on + * the platform. The previous code assumes that is one + * beyond where we want to populate, but that is impossible at the + * platform's infinity, so have to handle it specially */ + if (UNLIKELY(end == UV_MAX && ELEMENT_RANGE_MATCHES_INVLIST(len-1))) + { + const STRLEN offset = (STRLEN)(end - start); + swatch[offset >> 3] |= 1 << (offset & 7); + } + return; + } + + /* Advance to the next range, which will be for code points not in the + * inversion list */ + current = array[i]; + } + + return; +} + +void Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output) { /* Take the union of two inversion lists and point 'result' to it. If -- 1.8.3.1