| 1 | /* invlist_inline.h |
| 2 | * |
| 3 | * Copyright (C) 2012 by Larry Wall and others |
| 4 | * |
| 5 | * You may distribute under the terms of either the GNU General Public |
| 6 | * License or the Artistic License, as specified in the README file. |
| 7 | */ |
| 8 | |
| 9 | #ifndef PERL_INVLIST_INLINE_H_ |
| 10 | #define PERL_INVLIST_INLINE_H_ |
| 11 | |
| 12 | #if defined(PERL_IN_UTF8_C) \ |
| 13 | || defined(PERL_IN_REGCOMP_C) \ |
| 14 | || defined(PERL_IN_REGEXEC_C) \ |
| 15 | || defined(PERL_IN_TOKE_C) \ |
| 16 | || defined(PERL_IN_PP_C) \ |
| 17 | || defined(PERL_IN_OP_C) \ |
| 18 | || defined(PERL_IN_DOOP_C) |
| 19 | |
| 20 | /* An element is in an inversion list iff its index is even numbered: 0, 2, 4, |
| 21 | * etc */ |
| 22 | #define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1)) |
| 23 | #define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i)) |
| 24 | |
| 25 | /* This converts to/from our UVs to what the SV code is expecting: bytes. */ |
| 26 | #define TO_INTERNAL_SIZE(x) ((x) * sizeof(UV)) |
| 27 | #define FROM_INTERNAL_SIZE(x) ((x)/ sizeof(UV)) |
| 28 | |
| 29 | PERL_STATIC_INLINE bool |
| 30 | S_is_invlist(SV* const invlist) |
| 31 | { |
| 32 | return invlist != NULL && SvTYPE(invlist) == SVt_INVLIST; |
| 33 | } |
| 34 | |
| 35 | PERL_STATIC_INLINE bool* |
| 36 | S_get_invlist_offset_addr(SV* invlist) |
| 37 | { |
| 38 | /* Return the address of the field that says whether the inversion list is |
| 39 | * offset (it contains 1) or not (contains 0) */ |
| 40 | PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR; |
| 41 | |
| 42 | assert(is_invlist(invlist)); |
| 43 | |
| 44 | return &(((XINVLIST*) SvANY(invlist))->is_offset); |
| 45 | } |
| 46 | |
| 47 | PERL_STATIC_INLINE UV |
| 48 | S__invlist_len(SV* const invlist) |
| 49 | { |
| 50 | /* Returns the current number of elements stored in the inversion list's |
| 51 | * array */ |
| 52 | |
| 53 | PERL_ARGS_ASSERT__INVLIST_LEN; |
| 54 | |
| 55 | assert(is_invlist(invlist)); |
| 56 | |
| 57 | return (SvCUR(invlist) == 0) |
| 58 | ? 0 |
| 59 | : FROM_INTERNAL_SIZE(SvCUR(invlist)) - *get_invlist_offset_addr(invlist); |
| 60 | } |
| 61 | |
| 62 | PERL_STATIC_INLINE bool |
| 63 | S__invlist_contains_cp(SV* const invlist, const UV cp) |
| 64 | { |
| 65 | /* Does <invlist> contain code point <cp> as part of the set? */ |
| 66 | |
| 67 | IV index = _invlist_search(invlist, cp); |
| 68 | |
| 69 | PERL_ARGS_ASSERT__INVLIST_CONTAINS_CP; |
| 70 | |
| 71 | return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index); |
| 72 | } |
| 73 | |
| 74 | PERL_STATIC_INLINE UV* |
| 75 | S_invlist_array(SV* const invlist) |
| 76 | { |
| 77 | /* Returns the pointer to the inversion list's array. Every time the |
| 78 | * length changes, this needs to be called in case malloc or realloc moved |
| 79 | * it */ |
| 80 | |
| 81 | PERL_ARGS_ASSERT_INVLIST_ARRAY; |
| 82 | |
| 83 | /* Must not be empty. If these fail, you probably didn't check for <len> |
| 84 | * being non-zero before trying to get the array */ |
| 85 | assert(_invlist_len(invlist)); |
| 86 | |
| 87 | /* The very first element always contains zero, The array begins either |
| 88 | * there, or if the inversion list is offset, at the element after it. |
| 89 | * The offset header field determines which; it contains 0 or 1 to indicate |
| 90 | * how much additionally to add */ |
| 91 | assert(0 == *(SvPVX(invlist))); |
| 92 | return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist)); |
| 93 | } |
| 94 | |
| 95 | #endif |
| 96 | #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C) |
| 97 | |
| 98 | PERL_STATIC_INLINE void |
| 99 | S_invlist_extend(pTHX_ SV* const invlist, const UV new_max) |
| 100 | { |
| 101 | /* Grow the maximum size of an inversion list */ |
| 102 | |
| 103 | PERL_ARGS_ASSERT_INVLIST_EXTEND; |
| 104 | |
| 105 | assert(SvTYPE(invlist) == SVt_INVLIST); |
| 106 | |
| 107 | /* Add one to account for the zero element at the beginning which may not |
| 108 | * be counted by the calling parameters */ |
| 109 | SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1)); |
| 110 | } |
| 111 | |
| 112 | PERL_STATIC_INLINE void |
| 113 | S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset) |
| 114 | { |
| 115 | /* Sets the current number of elements stored in the inversion list. |
| 116 | * Updates SvCUR correspondingly */ |
| 117 | PERL_UNUSED_CONTEXT; |
| 118 | PERL_ARGS_ASSERT_INVLIST_SET_LEN; |
| 119 | |
| 120 | assert(SvTYPE(invlist) == SVt_INVLIST); |
| 121 | |
| 122 | SvCUR_set(invlist, |
| 123 | (len == 0) |
| 124 | ? 0 |
| 125 | : TO_INTERNAL_SIZE(len + offset)); |
| 126 | assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist)); |
| 127 | } |
| 128 | |
| 129 | PERL_STATIC_INLINE SV* |
| 130 | S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) { |
| 131 | return _add_range_to_invlist(invlist, cp, cp); |
| 132 | } |
| 133 | |
| 134 | PERL_STATIC_INLINE UV |
| 135 | S_invlist_highest(SV* const invlist) |
| 136 | { |
| 137 | /* Returns the highest code point that matches an inversion list. This API |
| 138 | * has an ambiguity, as it returns 0 under either the highest is actually |
| 139 | * 0, or if the list is empty. If this distinction matters to you, check |
| 140 | * for emptiness before calling this function */ |
| 141 | |
| 142 | UV len = _invlist_len(invlist); |
| 143 | UV *array; |
| 144 | |
| 145 | PERL_ARGS_ASSERT_INVLIST_HIGHEST; |
| 146 | |
| 147 | if (len == 0) { |
| 148 | return 0; |
| 149 | } |
| 150 | |
| 151 | array = invlist_array(invlist); |
| 152 | |
| 153 | /* The last element in the array in the inversion list always starts a |
| 154 | * range that goes to infinity. That range may be for code points that are |
| 155 | * matched in the inversion list, or it may be for ones that aren't |
| 156 | * matched. In the latter case, the highest code point in the set is one |
| 157 | * less than the beginning of this range; otherwise it is the final element |
| 158 | * of this range: infinity */ |
| 159 | return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1)) |
| 160 | ? UV_MAX |
| 161 | : array[len - 1] - 1; |
| 162 | } |
| 163 | |
| 164 | #endif |
| 165 | #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) |
| 166 | |
| 167 | PERL_STATIC_INLINE STRLEN* |
| 168 | S_get_invlist_iter_addr(SV* invlist) |
| 169 | { |
| 170 | /* Return the address of the UV that contains the current iteration |
| 171 | * position */ |
| 172 | |
| 173 | PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR; |
| 174 | |
| 175 | assert(is_invlist(invlist)); |
| 176 | |
| 177 | return &(((XINVLIST*) SvANY(invlist))->iterator); |
| 178 | } |
| 179 | |
| 180 | PERL_STATIC_INLINE void |
| 181 | S_invlist_iterinit(SV* invlist) /* Initialize iterator for invlist */ |
| 182 | { |
| 183 | PERL_ARGS_ASSERT_INVLIST_ITERINIT; |
| 184 | |
| 185 | *get_invlist_iter_addr(invlist) = 0; |
| 186 | } |
| 187 | |
| 188 | PERL_STATIC_INLINE void |
| 189 | S_invlist_iterfinish(SV* invlist) |
| 190 | { |
| 191 | /* Terminate iterator for invlist. This is to catch development errors. |
| 192 | * Any iteration that is interrupted before completed should call this |
| 193 | * function. Functions that add code points anywhere else but to the end |
| 194 | * of an inversion list assert that they are not in the middle of an |
| 195 | * iteration. If they were, the addition would make the iteration |
| 196 | * problematical: if the iteration hadn't reached the place where things |
| 197 | * were being added, it would be ok */ |
| 198 | |
| 199 | PERL_ARGS_ASSERT_INVLIST_ITERFINISH; |
| 200 | |
| 201 | *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX; |
| 202 | } |
| 203 | |
| 204 | STATIC bool |
| 205 | S_invlist_iternext(SV* invlist, UV* start, UV* end) |
| 206 | { |
| 207 | /* An C<invlist_iterinit> call on <invlist> must be used to set this up. |
| 208 | * This call sets in <*start> and <*end>, the next range in <invlist>. |
| 209 | * Returns <TRUE> if successful and the next call will return the next |
| 210 | * range; <FALSE> if was already at the end of the list. If the latter, |
| 211 | * <*start> and <*end> are unchanged, and the next call to this function |
| 212 | * will start over at the beginning of the list */ |
| 213 | |
| 214 | STRLEN* pos = get_invlist_iter_addr(invlist); |
| 215 | UV len = _invlist_len(invlist); |
| 216 | UV *array; |
| 217 | |
| 218 | PERL_ARGS_ASSERT_INVLIST_ITERNEXT; |
| 219 | |
| 220 | if (*pos >= len) { |
| 221 | *pos = (STRLEN) UV_MAX; /* Force iterinit() to be required next time */ |
| 222 | return FALSE; |
| 223 | } |
| 224 | |
| 225 | array = invlist_array(invlist); |
| 226 | |
| 227 | *start = array[(*pos)++]; |
| 228 | |
| 229 | if (*pos >= len) { |
| 230 | *end = UV_MAX; |
| 231 | } |
| 232 | else { |
| 233 | *end = array[(*pos)++] - 1; |
| 234 | } |
| 235 | |
| 236 | return TRUE; |
| 237 | } |
| 238 | |
| 239 | #endif |
| 240 | |
| 241 | #ifndef PERL_IN_REGCOMP_C |
| 242 | |
| 243 | /* These symbols are only needed later in regcomp.c */ |
| 244 | # undef TO_INTERNAL_SIZE |
| 245 | # undef FROM_INTERNAL_SIZE |
| 246 | #endif |
| 247 | |
| 248 | #endif /* PERL_INVLIST_INLINE_H_ */ |