From de353015643cf10b437d714d3483c1209e079916 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 4 Jul 2013 22:01:05 -0600 Subject: [PATCH] Revert "regcomp.c: Add a constant 0 element before inversion lists" This reverts commit 533c4e2f08b42d977e5004e823d4849f7473d2d0. This continues the backing out of this topic branch. A bisect shows that the first commit exhibiting an error is the first one in the branch. --- charclass_invlists.h | 250 +++++++++++++++++++++++---------------------------- embed.fnc | 10 +-- inline_invlist.c | 21 +++-- proto.h | 4 +- regcomp.c | 124 ++++++++++++++++--------- regen/mk_invlists.pl | 27 ++++-- 6 files changed, 224 insertions(+), 212 deletions(-) diff --git a/charclass_invlists.h b/charclass_invlists.h index 690526d..b5d71af 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -13,11 +13,11 @@ static UV Latin1_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 0, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, - 256 + 290655244, /* Version and data structure type */ + 0, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ + 256, + 0 }; #endif @@ -28,10 +28,9 @@ static UV AboveLatin1_invlist[] = { 1, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 256 }; @@ -43,11 +42,11 @@ static UV ASCII_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 0, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, - 128 + 290655244, /* Version and data structure type */ + 0, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ + 128, + 0 }; #endif @@ -58,10 +57,9 @@ static UV L1Cased_invlist[] = { 16, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 65, 91, 97, @@ -88,10 +86,9 @@ static UV VertSpace_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 10, 14, 133, @@ -108,10 +105,9 @@ static UV PerlSpace_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 9, 14, 32, @@ -126,10 +122,9 @@ static UV XPerlSpace_invlist[] = { 22, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 9, 14, 32, @@ -162,10 +157,9 @@ static UV PosixAlnum_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 48, 58, 65, @@ -182,10 +176,9 @@ static UV L1PosixAlnum_invlist[] = { 18, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 48, 58, 65, @@ -214,10 +207,9 @@ static UV PosixAlpha_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 65, 91, 97, @@ -232,10 +224,9 @@ static UV L1PosixAlpha_invlist[] = { 16, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 65, 91, 97, @@ -262,10 +253,9 @@ static UV PosixBlank_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 9, 10, 32, @@ -280,10 +270,9 @@ static UV XPosixBlank_invlist[] = { 18, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 9, 10, 32, @@ -312,13 +301,13 @@ static UV PosixCntrl_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 0, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 0, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 32, 127, - 128 + 128, + 0 }; #endif @@ -329,13 +318,13 @@ static UV XPosixCntrl_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 0, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 0, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 32, 127, - 160 + 160, + 0 }; #endif @@ -346,10 +335,9 @@ static UV PosixDigit_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 48, 58 }; @@ -362,10 +350,9 @@ static UV PosixGraph_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 33, 127 }; @@ -378,10 +365,9 @@ static UV L1PosixGraph_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 33, 127, 161, @@ -396,10 +382,9 @@ static UV PosixLower_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 97, 123 }; @@ -412,10 +397,9 @@ static UV L1PosixLower_invlist[] = { 12, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 97, 123, 170, @@ -438,10 +422,9 @@ static UV PosixPrint_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 32, 127 }; @@ -454,10 +437,9 @@ static UV L1PosixPrint_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 32, 127, 160, @@ -472,10 +454,9 @@ static UV PosixPunct_invlist[] = { 8, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 33, 48, 58, @@ -494,10 +475,9 @@ static UV L1PosixPunct_invlist[] = { 20, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 33, 48, 58, @@ -528,10 +508,9 @@ static UV PosixSpace_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 9, 14, 32, @@ -546,10 +525,9 @@ static UV XPosixSpace_invlist[] = { 22, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 9, 14, 32, @@ -582,10 +560,9 @@ static UV PosixUpper_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 65, 91 }; @@ -598,10 +575,9 @@ static UV L1PosixUpper_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 65, 91, 192, @@ -618,10 +594,9 @@ static UV PosixWord_invlist[] = { 8, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 48, 58, 65, @@ -640,10 +615,9 @@ static UV L1PosixWord_invlist[] = { 20, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 48, 58, 65, @@ -674,10 +648,9 @@ static UV PosixXDigit_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 48, 58, 65, @@ -694,10 +667,9 @@ static UV XPosixXDigit_invlist[] = { 12, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 48, 58, 65, @@ -718,10 +690,9 @@ static UV NonL1_Perl_Non_Final_Folds_invlist[] = { 44, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 700, 701, 776, @@ -774,10 +745,9 @@ static UV _Perl_Multi_Char_Folds_invlist[] = { 58, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 1039476070, /* Version and data structure type */ - 1, /* 0 if the list starts at 0; - 1 if it starts at the element beyond 0 */ - 0, + 290655244, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ 223, 224, 304, diff --git a/embed.fnc b/embed.fnc index 8ad9a13..5c27b27 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1085,7 +1085,7 @@ Ap |SV* |regclass_swash |NULLOK const regexp *prog \ |NULLOK SV **listsvp|NULLOK SV **altsvp #ifdef PERL_IN_REGCOMP_C EMsR |SV* |_new_invlist_C_array|NN UV* list -: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b +: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|bool complement_b #endif Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \ |NN char* strend|NN char* strbeg|I32 minend \ @@ -1446,13 +1446,9 @@ EiMR |UV |invlist_highest|NN SV* const invlist #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) EXmM |void |_invlist_intersection |NN SV* const a|NN SV* const b|NN SV** i -EXpM |void |_invlist_intersection_maybe_complement_2nd \ - |NULLOK SV* const a|NN SV* const b \ - |const bool complement_b|NN SV** i +EXpM |void |_invlist_intersection_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** i EXmM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output -EXpM |void |_invlist_union_maybe_complement_2nd \ - |NULLOK SV* const a|NN SV* const b \ - |const bool complement_b|NN SV** output +EXpM |void |_invlist_union_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** output EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result EXpM |void |_invlist_invert|NN SV* const invlist EXpM |void |_invlist_invert_prop|NN SV* const invlist diff --git a/inline_invlist.c b/inline_invlist.c index b194c0d..b56ce60 100644 --- a/inline_invlist.c +++ b/inline_invlist.c @@ -20,21 +20,20 @@ * insert that at this location. Then, if an auxiliary program doesn't change * correspondingly, it will be discovered immediately */ #define INVLIST_VERSION_ID_OFFSET 3 -#define INVLIST_VERSION_ID 1039476070 - -#define INVLIST_ZERO_OFFSET 4 /* 0 or 1 */ -/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list - * contains the code point U+00000, and begins at element [0] in the array, - * which always contains 0. If 1, the inversion list doesn't contain U+0000, - * and it begins at element [1]. Inverting an inversion list consists of - * adding or removing the 0 at the beginning of it. By reserving a space for - * that 0, inversion can be made very fast: we just flip this UV */ +#define INVLIST_VERSION_ID 290655244 /* For safety, when adding new elements, remember to #undef them at the end of * the inversion list code section */ -#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 2) /* includes 1 for the constant - 0 element */ +#define INVLIST_ZERO_OFFSET 4 /* 0 or 1; must be last element in header */ +/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list + * contains the code point U+00000, and begins here. If 1, the inversion list + * doesn't contain U+0000, and it begins at the next UV in the array. + * Inverting an inversion list consists of adding or removing the 0 at the + * beginning of it. By reserving a space for that 0, inversion can be made + * very fast */ + +#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 1) /* An element is in an inversion list iff its index is even numbered: 0, 2, 4, * etc */ diff --git a/proto.h b/proto.h index f5dc0dc..9a06590 100644 --- a/proto.h +++ b/proto.h @@ -6839,7 +6839,7 @@ PERL_CALLCONV SV* Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); */ -PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** i) +PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** i) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_4); #define PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND \ @@ -6870,7 +6870,7 @@ PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); */ -PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output) +PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_4); #define PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND \ diff --git a/regcomp.c b/regcomp.c index b2bf63c..4885c0b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -7047,10 +7047,10 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) * list.) * Taking the complement (inverting) an inversion list is quite simple, if the * first element is 0, remove it; otherwise add a 0 element at the beginning. - * This implementation reserves an element (considered to be the final element - * of the header) at the beginning of each inversion list to always contain 0; - * there is an additional flag in the header which indicates if the list begins - * at the 0, or is offset to begin at the next element. + * This implementation reserves an element at the beginning of each inversion + * list to contain 0 when the list contains 0, and contains 1 otherwise. The + * actual beginning of the list is either that element if 0, or the next one if + * 1. * * More about inversion lists can be found in "Unicode Demystified" * Chapter 13 by Richard Gillam, published by Addison-Wesley. @@ -7075,11 +7075,11 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0) { /* Returns a pointer to the first element in the inversion list's array. * This is called upon initialization of an inversion list. Where the - * array begins depends on whether the list has the code point U+0000 in it - * or not. The other parameter tells it whether the code that follows this - * call is about to put a 0 in the inversion list or not. The first - * element is either the final part of the header reserved for 0, if TRUE, - * or the first element of the non-heading part, if FALSE */ + * array begins depends on whether the list has the code point U+0000 + * in it or not. The other parameter tells it whether the code that + * follows this call is about to put a 0 in the inversion list or not. + * The first element is either the element with 0, if 0, or the next one, + * if 1 */ UV* zero = get_invlist_zero_addr(invlist); @@ -7090,8 +7090,7 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0) /* 1^1 = 0; 1^0 = 1 */ *zero = 1 ^ will_have_0; - *(zero + 1) = 0; - return 1 + zero + *zero; + return zero + *zero; } PERL_STATIC_INLINE UV* @@ -7109,12 +7108,10 @@ S_invlist_array(pTHX_ SV* const invlist) assert(*get_invlist_zero_addr(invlist) == 0 || *get_invlist_zero_addr(invlist) == 1); - /* The array begins either at the header element reserved for zero or the - * element after that. The reserved element is 1 past the zero_addr - * element; the latter contains 0 or 1 to indicate how much additionally to - * add */ - assert(0 == *(1 + get_invlist_zero_addr(invlist))); - return (UV *) (1 + get_invlist_zero_addr(invlist) + /* The array begins either at the element reserved for zero if the + * list contains 0 (that element will be set to 0), or otherwise the next + * element (in which case the reserved element will be set to 1). */ + return (UV *) (get_invlist_zero_addr(invlist) + *get_invlist_zero_addr(invlist)); } @@ -7130,7 +7127,19 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len) assert(len <= SvLEN(invlist)); SvCUR_set(invlist, TO_INTERNAL_SIZE(len)); - /* Note that when inverting, SvCUR shouldn't change */ + /* If the list contains U+0000, that element is part of the header, + * and should not be counted as part of the array. It will contain + * 0 in that case, and 1 otherwise. So we could flop 0=>1, 1=>0 and + * subtract: + * SvCUR_set(invlist, + * TO_INTERNAL_SIZE(len + * - (*get_invlist_zero_addr(inv_list) ^ 1))); + * But, this is only valid if len is not 0. The consequences of not doing + * this is that the memory allocation code may think that 1 more UV is + * being used than actually is, and so might do an unnecessary grow. That + * seems worth not bothering to make this the precise amount. + * + * Note that when inverting, SvCUR shouldn't change */ } PERL_STATIC_INLINE IV* @@ -7182,8 +7191,10 @@ S_invlist_max(pTHX_ SV* const invlist) PERL_STATIC_INLINE UV* S_get_invlist_zero_addr(pTHX_ SV* invlist) { - /* Return the address of the UV that says whether the inversion list is - * offset (it contains 1) or not (contains 0) */ + /* Return the address of the UV that is reserved to hold 0 if the inversion + * list contains 0. This has to be the last element of the heading, as the + * list proper starts with either it if 0, or the next element if not. + * (But we force it to contain either 0 or 1) */ PERL_ARGS_ASSERT_GET_INVLIST_ZERO_ADDR; @@ -7200,7 +7211,6 @@ Perl__new_invlist(pTHX_ IV initial_size) * system default is used instead */ SV* new_list; - UV* zero_addr; if (initial_size < 0) { initial_size = INVLIST_INITIAL_LEN; @@ -7215,13 +7225,11 @@ Perl__new_invlist(pTHX_ IV initial_size) /* This should force a segfault if a method doesn't initialize this * properly */ - zero_addr = get_invlist_zero_addr(new_list); - *zero_addr = UV_MAX; - *(zero_addr + 1) = 0; + *get_invlist_zero_addr(new_list) = UV_MAX; *get_invlist_previous_index_addr(new_list) = 0; *get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID; -#if HEADER_LENGTH != 6 +#if HEADER_LENGTH != 5 # error Need to regenerate INVLIST_VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length #endif @@ -7546,7 +7554,7 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV } void -Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output) +Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output) { /* Take the union of two inversion lists and point to it. *output * SHOULD BE DEFINED upon input, and if it points to one of the two lists, @@ -7568,8 +7576,8 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const b * return the larger of the input lists, but then outside code might need * to keep track of whether to free the input list or not */ - const UV* array_a; /* a's array */ - const UV* array_b; + UV* array_a; /* a's array */ + UV* array_b; UV len_a; /* length of a's array */ UV len_b; @@ -7637,17 +7645,23 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const b if (complement_b) { /* To complement, we invert: if the first element is 0, remove it. To - * do this, we just pretend the array starts one later */ + * do this, we just pretend the array starts one later, and clear the + * flag as we don't have to do anything else later */ if (array_b[0] == 0) { array_b++; len_b--; + complement_b = FALSE; } else { - /* But if the first element is not zero, we pretend the list starts - * at the 0 that is always stored immediately before the array. */ + /* But if the first element is not zero, we unshift a 0 before the + * array. The data structure reserves a space for that 0 (which + * should be a '1' right now), so physical shifting is unneeded, + * but temporarily change that element to 0. Before exiting the + * routine, we must restore the element to '1' */ array_b--; len_b++; + array_b[0] = 0; } } @@ -7764,6 +7778,11 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const b } } + /* If we've changed b, restore it */ + if (complement_b) { + array_b[0] = 1; + } + /* We may be removing a reference to one of the inputs */ if (a == *output || b == *output) { assert(! invlist_is_iterating(*output)); @@ -7775,7 +7794,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const b } void -Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** i) +Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** i) { /* Take the intersection of two inversion lists and point to it. *i * SHOULD BE DEFINED upon input, and if it points to one of the two lists, @@ -7792,8 +7811,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, * union above */ - const UV* array_a; /* a's array */ - const UV* array_b; + UV* array_a; /* a's array */ + UV* array_b; UV len_a; /* length of a's array */ UV len_b; @@ -7858,17 +7877,23 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, if (complement_b) { /* To complement, we invert: if the first element is 0, remove it. To - * do this, we just pretend the array starts one later */ + * do this, we just pretend the array starts one later, and clear the + * flag as we don't have to do anything else later */ if (array_b[0] == 0) { array_b++; len_b--; + complement_b = FALSE; } else { - /* But if the first element is not zero, we pretend the list starts - * at the 0 that is always stored immediately before the array. */ + /* But if the first element is not zero, we unshift a 0 before the + * array. The data structure reserves a space for that 0 (which + * should be a '1' right now), so physical shifting is unneeded, + * but temporarily change that element to 0. Before exiting the + * routine, we must restore the element to '1' */ array_b--; len_b++; + array_b[0] = 0; } } @@ -7975,6 +8000,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, } } + /* If we've changed b, restore it */ + if (complement_b) { + array_b[0] = 1; + } + /* We may be removing a reference to one of the inputs */ if (a == *i || b == *i) { assert(! invlist_is_iterating(*i)); @@ -8313,14 +8343,14 @@ Perl__invlist_dump(pTHX_ SV* const invlist, const char * const header) #if 0 bool -S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b) +S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b) { /* Return a boolean as to if the two passed in inversion lists are * identical. The final argument, if TRUE, says to take the complement of * the second inversion list before doing the comparison */ - const UV* array_a = invlist_array(a); - const UV* array_b = invlist_array(b); + UV* array_a = invlist_array(a); + UV* array_b = invlist_array(b); UV len_a = _invlist_len(a); UV len_b = _invlist_len(b); @@ -8342,15 +8372,20 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b) /* Otherwise, to complement, we invert. Here, the first element is * 0, just remove it. To do this, we just pretend the array starts - * one later */ + * one later, and clear the flag as we don't have to do anything + * else later */ array_b++; len_b--; + complement_b = FALSE; } else { - /* But if the first element is not zero, we pretend the list starts - * at the 0 that is always stored immediately before the array. */ + /* But if the first element is not zero, we unshift a 0 before the + * array. The data structure reserves a space for that 0 (which + * should be a '1' right now), so physical shifting is unneeded, + * but temporarily change that element to 0. Before exiting the + * routine, we must restore the element to '1' */ array_b--; len_b++; array_b[0] = 0; @@ -8370,6 +8405,9 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b) } } + if (complement_b) { + array_b[0] = 1; + } return retval; } #endif diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 27c0802..67b6e41 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -15,7 +15,7 @@ require 'regen/regen_lib.pl'; # in the headers is used to minimize the possibility of things getting # out-of-sync, or the wrong data structure being passed. Currently that # random number is: -my $VERSION_DATA_STRUCTURE_TYPE = 1039476070; +my $VERSION_DATA_STRUCTURE_TYPE = 290655244; my $out_fh = open_new('charclass_invlists.h', '>', {style => '*', by => $0, @@ -36,18 +36,27 @@ sub output_invlist ($$) { # Output the inversion list $invlist using the name $name for it. # It is output in the exact internal form for inversion lists. - # Is the last element of the header 0, or 1 ? - my $zero_or_one = 0; - my $count = @$invlist; - if ($invlist->[0] != 0) { - unshift @$invlist, 0; + my $zero_or_one; # Is the last element of the header 0, or 1 ? + + # If the first element is 0, it goes in the header, instead of the body + if ($invlist->[0] == 0) { + shift @$invlist; + + $zero_or_one = 0; + + # Add a dummy 0 at the end so that the length is constant. inversion + # lists are always stored with enough room so that if they change from + # beginning with 0, they don't have to grow. + push @$invlist, 0; + } + else { $zero_or_one = 1; } print $out_fh "\n#ifndef PERL_IN_XSUB_RE\n" unless exists $include_in_ext_re{$name}; print $out_fh "\nstatic UV ${name}_invlist[] = {\n"; - print $out_fh "\t$count,\t/* Number of elements */\n"; + print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n"; # This should be UV_MAX, but I (khw) am not confident that the suffixes # for specifying the constant are portable, e.g. 'ull' on a 32 bit @@ -56,8 +65,8 @@ sub output_invlist ($$) { print $out_fh "\t0,\t/* Cache of previous search index result */\n"; print $out_fh "\t$VERSION_DATA_STRUCTURE_TYPE, /* Version and data structure type */\n"; print $out_fh "\t", $zero_or_one, - ",\t/* 0 if the list starts at 0;", - "\n\t\t 1 if it starts at the element beyond 0 */\n"; + ",\t/* 0 if this is the first element of the list proper;", + "\n\t\t 1 if the next element is the first */\n"; # The main body are the UVs passed in to this routine. Do the final # element separately -- 1.8.3.1