Commit | Line | Data |
---|---|---|
b992490d | 1 | /* invlist_inline.h |
81e983c1 KW |
2 | * |
3 | * Copyright (C) 2012 by Larry Wall and others | |
4 | * | |
5 | * You may distribute under the terms of either the GNU General Public | |
6 | * License or the Artistic License, as specified in the README file. | |
81e983c1 KW |
7 | */ |
8 | ||
3dd7db29 JK |
9 | #ifndef PERL_INVLIST_INLINE_H_ |
10 | #define PERL_INVLIST_INLINE_H_ | |
11 | ||
de9382e0 KW |
12 | #if defined(PERL_IN_UTF8_C) \ |
13 | || defined(PERL_IN_REGCOMP_C) \ | |
14 | || defined(PERL_IN_REGEXEC_C) \ | |
15 | || defined(PERL_IN_TOKE_C) \ | |
16 | || defined(PERL_IN_PP_C) | |
81e983c1 | 17 | |
9e7f4f43 KW |
18 | /* An element is in an inversion list iff its index is even numbered: 0, 2, 4, |
19 | * etc */ | |
20 | #define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1)) | |
21 | #define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i)) | |
22 | ||
4c60406d KW |
23 | /* This converts to/from our UVs to what the SV code is expecting: bytes. */ |
24 | #define TO_INTERNAL_SIZE(x) ((x) * sizeof(UV)) | |
25 | #define FROM_INTERNAL_SIZE(x) ((x)/ sizeof(UV)) | |
26 | ||
510ceaa0 KW |
27 | PERL_STATIC_INLINE bool |
28 | S_is_invlist(SV* const invlist) | |
29 | { | |
465848b5 | 30 | return invlist != NULL && SvTYPE(invlist) == SVt_INVLIST; |
510ceaa0 KW |
31 | } |
32 | ||
4c60406d | 33 | PERL_STATIC_INLINE bool* |
dc3bf405 | 34 | S_get_invlist_offset_addr(SV* invlist) |
9e7f4f43 | 35 | { |
4c60406d KW |
36 | /* Return the address of the field that says whether the inversion list is |
37 | * offset (it contains 1) or not (contains 0) */ | |
4c60406d | 38 | PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR; |
9e7f4f43 | 39 | |
510ceaa0 | 40 | assert(is_invlist(invlist)); |
f49d8074 | 41 | |
4c60406d | 42 | return &(((XINVLIST*) SvANY(invlist))->is_offset); |
9e7f4f43 KW |
43 | } |
44 | ||
45 | PERL_STATIC_INLINE UV | |
dc3bf405 | 46 | S__invlist_len(SV* const invlist) |
9e7f4f43 KW |
47 | { |
48 | /* Returns the current number of elements stored in the inversion list's | |
49 | * array */ | |
50 | ||
51 | PERL_ARGS_ASSERT__INVLIST_LEN; | |
52 | ||
510ceaa0 | 53 | assert(is_invlist(invlist)); |
f49d8074 | 54 | |
4c60406d KW |
55 | return (SvCUR(invlist) == 0) |
56 | ? 0 | |
57 | : FROM_INTERNAL_SIZE(SvCUR(invlist)) - *get_invlist_offset_addr(invlist); | |
9e7f4f43 KW |
58 | } |
59 | ||
60 | PERL_STATIC_INLINE bool | |
dc3bf405 | 61 | S__invlist_contains_cp(SV* const invlist, const UV cp) |
9e7f4f43 KW |
62 | { |
63 | /* Does <invlist> contain code point <cp> as part of the set? */ | |
64 | ||
65 | IV index = _invlist_search(invlist, cp); | |
66 | ||
67 | PERL_ARGS_ASSERT__INVLIST_CONTAINS_CP; | |
68 | ||
69 | return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index); | |
70 | } | |
71 | ||
551cedb5 KW |
72 | PERL_STATIC_INLINE UV* |
73 | S_invlist_array(SV* const invlist) | |
74 | { | |
75 | /* Returns the pointer to the inversion list's array. Every time the | |
76 | * length changes, this needs to be called in case malloc or realloc moved | |
77 | * it */ | |
78 | ||
79 | PERL_ARGS_ASSERT_INVLIST_ARRAY; | |
80 | ||
81 | /* Must not be empty. If these fail, you probably didn't check for <len> | |
82 | * being non-zero before trying to get the array */ | |
83 | assert(_invlist_len(invlist)); | |
84 | ||
85 | /* The very first element always contains zero, The array begins either | |
86 | * there, or if the inversion list is offset, at the element after it. | |
87 | * The offset header field determines which; it contains 0 or 1 to indicate | |
88 | * how much additionally to add */ | |
89 | assert(0 == *(SvPVX(invlist))); | |
90 | return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist)); | |
91 | } | |
92 | ||
a9269870 KW |
93 | #endif |
94 | #if defined(PERL_IN_REGCOMP_C) | |
95 | ||
96 | PERL_STATIC_INLINE void | |
97 | S_invlist_extend(pTHX_ SV* const invlist, const UV new_max) | |
98 | { | |
99 | /* Grow the maximum size of an inversion list */ | |
100 | ||
101 | PERL_ARGS_ASSERT_INVLIST_EXTEND; | |
102 | ||
103 | assert(SvTYPE(invlist) == SVt_INVLIST); | |
104 | ||
105 | /* Add one to account for the zero element at the beginning which may not | |
106 | * be counted by the calling parameters */ | |
107 | SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1)); | |
108 | } | |
109 | ||
110 | PERL_STATIC_INLINE void | |
111 | S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset) | |
112 | { | |
113 | /* Sets the current number of elements stored in the inversion list. | |
114 | * Updates SvCUR correspondingly */ | |
115 | PERL_UNUSED_CONTEXT; | |
116 | PERL_ARGS_ASSERT_INVLIST_SET_LEN; | |
117 | ||
118 | assert(SvTYPE(invlist) == SVt_INVLIST); | |
119 | ||
120 | SvCUR_set(invlist, | |
121 | (len == 0) | |
122 | ? 0 | |
123 | : TO_INTERNAL_SIZE(len + offset)); | |
124 | assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist)); | |
125 | } | |
126 | ||
127 | PERL_STATIC_INLINE SV* | |
128 | S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) { | |
129 | return _add_range_to_invlist(invlist, cp, cp); | |
130 | } | |
131 | ||
132 | PERL_STATIC_INLINE UV | |
133 | S_invlist_highest(SV* const invlist) | |
134 | { | |
135 | /* Returns the highest code point that matches an inversion list. This API | |
136 | * has an ambiguity, as it returns 0 under either the highest is actually | |
137 | * 0, or if the list is empty. If this distinction matters to you, check | |
138 | * for emptiness before calling this function */ | |
139 | ||
140 | UV len = _invlist_len(invlist); | |
141 | UV *array; | |
142 | ||
143 | PERL_ARGS_ASSERT_INVLIST_HIGHEST; | |
144 | ||
145 | if (len == 0) { | |
146 | return 0; | |
147 | } | |
148 | ||
149 | array = invlist_array(invlist); | |
150 | ||
151 | /* The last element in the array in the inversion list always starts a | |
152 | * range that goes to infinity. That range may be for code points that are | |
153 | * matched in the inversion list, or it may be for ones that aren't | |
154 | * matched. In the latter case, the highest code point in the set is one | |
155 | * less than the beginning of this range; otherwise it is the final element | |
156 | * of this range: infinity */ | |
157 | return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1)) | |
158 | ? UV_MAX | |
159 | : array[len - 1] - 1; | |
160 | } | |
161 | ||
162 | PERL_STATIC_INLINE STRLEN* | |
163 | S_get_invlist_iter_addr(SV* invlist) | |
164 | { | |
165 | /* Return the address of the UV that contains the current iteration | |
166 | * position */ | |
167 | ||
168 | PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR; | |
169 | ||
170 | assert(is_invlist(invlist)); | |
171 | ||
172 | return &(((XINVLIST*) SvANY(invlist))->iterator); | |
173 | } | |
174 | ||
175 | PERL_STATIC_INLINE void | |
176 | S_invlist_iterinit(SV* invlist) /* Initialize iterator for invlist */ | |
177 | { | |
178 | PERL_ARGS_ASSERT_INVLIST_ITERINIT; | |
179 | ||
180 | *get_invlist_iter_addr(invlist) = 0; | |
181 | } | |
182 | ||
183 | PERL_STATIC_INLINE void | |
184 | S_invlist_iterfinish(SV* invlist) | |
185 | { | |
186 | /* Terminate iterator for invlist. This is to catch development errors. | |
187 | * Any iteration that is interrupted before completed should call this | |
188 | * function. Functions that add code points anywhere else but to the end | |
189 | * of an inversion list assert that they are not in the middle of an | |
190 | * iteration. If they were, the addition would make the iteration | |
191 | * problematical: if the iteration hadn't reached the place where things | |
192 | * were being added, it would be ok */ | |
193 | ||
194 | PERL_ARGS_ASSERT_INVLIST_ITERFINISH; | |
195 | ||
196 | *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX; | |
197 | } | |
198 | ||
199 | STATIC bool | |
200 | S_invlist_iternext(SV* invlist, UV* start, UV* end) | |
201 | { | |
202 | /* An C<invlist_iterinit> call on <invlist> must be used to set this up. | |
203 | * This call sets in <*start> and <*end>, the next range in <invlist>. | |
204 | * Returns <TRUE> if successful and the next call will return the next | |
205 | * range; <FALSE> if was already at the end of the list. If the latter, | |
206 | * <*start> and <*end> are unchanged, and the next call to this function | |
207 | * will start over at the beginning of the list */ | |
208 | ||
209 | STRLEN* pos = get_invlist_iter_addr(invlist); | |
210 | UV len = _invlist_len(invlist); | |
211 | UV *array; | |
212 | ||
213 | PERL_ARGS_ASSERT_INVLIST_ITERNEXT; | |
214 | ||
215 | if (*pos >= len) { | |
216 | *pos = (STRLEN) UV_MAX; /* Force iterinit() to be required next time */ | |
217 | return FALSE; | |
218 | } | |
219 | ||
220 | array = invlist_array(invlist); | |
221 | ||
222 | *start = array[(*pos)++]; | |
223 | ||
224 | if (*pos >= len) { | |
225 | *end = UV_MAX; | |
226 | } | |
227 | else { | |
228 | *end = array[(*pos)++] - 1; | |
229 | } | |
230 | ||
231 | return TRUE; | |
232 | } | |
233 | ||
234 | #endif | |
235 | ||
236 | #ifndef PERL_IN_REGCOMP_C | |
4c60406d KW |
237 | |
238 | /* These symbols are only needed later in regcomp.c */ | |
239 | # undef TO_INTERNAL_SIZE | |
240 | # undef FROM_INTERNAL_SIZE | |
81e983c1 | 241 | #endif |
3dd7db29 JK |
242 | |
243 | #endif /* PERL_INVLIST_INLINE_H_ */ |