Commit | Line | Data |
---|---|---|
b992490d | 1 | /* invlist_inline.h |
81e983c1 KW |
2 | * |
3 | * Copyright (C) 2012 by Larry Wall and others | |
4 | * | |
5 | * You may distribute under the terms of either the GNU General Public | |
6 | * License or the Artistic License, as specified in the README file. | |
81e983c1 KW |
7 | */ |
8 | ||
3dd7db29 JK |
9 | #ifndef PERL_INVLIST_INLINE_H_ |
10 | #define PERL_INVLIST_INLINE_H_ | |
11 | ||
de9382e0 KW |
12 | #if defined(PERL_IN_UTF8_C) \ |
13 | || defined(PERL_IN_REGCOMP_C) \ | |
14 | || defined(PERL_IN_REGEXEC_C) \ | |
15 | || defined(PERL_IN_TOKE_C) \ | |
58a0d047 | 16 | || defined(PERL_IN_PP_C) \ |
f34acfec KW |
17 | || defined(PERL_IN_OP_C) \ |
18 | || defined(PERL_IN_DOOP_C) | |
81e983c1 | 19 | |
9e7f4f43 KW |
20 | /* An element is in an inversion list iff its index is even numbered: 0, 2, 4, |
21 | * etc */ | |
22 | #define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1)) | |
23 | #define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i)) | |
24 | ||
4c60406d KW |
25 | /* This converts to/from our UVs to what the SV code is expecting: bytes. */ |
26 | #define TO_INTERNAL_SIZE(x) ((x) * sizeof(UV)) | |
27 | #define FROM_INTERNAL_SIZE(x) ((x)/ sizeof(UV)) | |
28 | ||
510ceaa0 | 29 | PERL_STATIC_INLINE bool |
ffb1f0fd | 30 | S_is_invlist(const SV* const invlist) |
510ceaa0 | 31 | { |
465848b5 | 32 | return invlist != NULL && SvTYPE(invlist) == SVt_INVLIST; |
510ceaa0 KW |
33 | } |
34 | ||
4c60406d | 35 | PERL_STATIC_INLINE bool* |
dc3bf405 | 36 | S_get_invlist_offset_addr(SV* invlist) |
9e7f4f43 | 37 | { |
4c60406d KW |
38 | /* Return the address of the field that says whether the inversion list is |
39 | * offset (it contains 1) or not (contains 0) */ | |
4c60406d | 40 | PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR; |
9e7f4f43 | 41 | |
510ceaa0 | 42 | assert(is_invlist(invlist)); |
f49d8074 | 43 | |
4c60406d | 44 | return &(((XINVLIST*) SvANY(invlist))->is_offset); |
9e7f4f43 KW |
45 | } |
46 | ||
47 | PERL_STATIC_INLINE UV | |
dc3bf405 | 48 | S__invlist_len(SV* const invlist) |
9e7f4f43 KW |
49 | { |
50 | /* Returns the current number of elements stored in the inversion list's | |
51 | * array */ | |
52 | ||
53 | PERL_ARGS_ASSERT__INVLIST_LEN; | |
54 | ||
510ceaa0 | 55 | assert(is_invlist(invlist)); |
f49d8074 | 56 | |
4c60406d KW |
57 | return (SvCUR(invlist) == 0) |
58 | ? 0 | |
59 | : FROM_INTERNAL_SIZE(SvCUR(invlist)) - *get_invlist_offset_addr(invlist); | |
9e7f4f43 KW |
60 | } |
61 | ||
62 | PERL_STATIC_INLINE bool | |
dc3bf405 | 63 | S__invlist_contains_cp(SV* const invlist, const UV cp) |
9e7f4f43 KW |
64 | { |
65 | /* Does <invlist> contain code point <cp> as part of the set? */ | |
66 | ||
67 | IV index = _invlist_search(invlist, cp); | |
68 | ||
69 | PERL_ARGS_ASSERT__INVLIST_CONTAINS_CP; | |
70 | ||
71 | return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index); | |
72 | } | |
73 | ||
551cedb5 KW |
74 | PERL_STATIC_INLINE UV* |
75 | S_invlist_array(SV* const invlist) | |
76 | { | |
77 | /* Returns the pointer to the inversion list's array. Every time the | |
78 | * length changes, this needs to be called in case malloc or realloc moved | |
79 | * it */ | |
80 | ||
81 | PERL_ARGS_ASSERT_INVLIST_ARRAY; | |
82 | ||
83 | /* Must not be empty. If these fail, you probably didn't check for <len> | |
84 | * being non-zero before trying to get the array */ | |
85 | assert(_invlist_len(invlist)); | |
86 | ||
87 | /* The very first element always contains zero, The array begins either | |
88 | * there, or if the inversion list is offset, at the element after it. | |
89 | * The offset header field determines which; it contains 0 or 1 to indicate | |
90 | * how much additionally to add */ | |
91 | assert(0 == *(SvPVX(invlist))); | |
92 | return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist)); | |
93 | } | |
94 | ||
a9269870 | 95 | #endif |
f34acfec | 96 | #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C) |
a9269870 KW |
97 | |
98 | PERL_STATIC_INLINE void | |
99 | S_invlist_extend(pTHX_ SV* const invlist, const UV new_max) | |
100 | { | |
101 | /* Grow the maximum size of an inversion list */ | |
102 | ||
103 | PERL_ARGS_ASSERT_INVLIST_EXTEND; | |
104 | ||
105 | assert(SvTYPE(invlist) == SVt_INVLIST); | |
106 | ||
107 | /* Add one to account for the zero element at the beginning which may not | |
108 | * be counted by the calling parameters */ | |
109 | SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1)); | |
110 | } | |
111 | ||
112 | PERL_STATIC_INLINE void | |
113 | S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset) | |
114 | { | |
115 | /* Sets the current number of elements stored in the inversion list. | |
116 | * Updates SvCUR correspondingly */ | |
117 | PERL_UNUSED_CONTEXT; | |
118 | PERL_ARGS_ASSERT_INVLIST_SET_LEN; | |
119 | ||
120 | assert(SvTYPE(invlist) == SVt_INVLIST); | |
121 | ||
122 | SvCUR_set(invlist, | |
123 | (len == 0) | |
124 | ? 0 | |
125 | : TO_INTERNAL_SIZE(len + offset)); | |
126 | assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist)); | |
127 | } | |
128 | ||
129 | PERL_STATIC_INLINE SV* | |
130 | S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) { | |
131 | return _add_range_to_invlist(invlist, cp, cp); | |
132 | } | |
133 | ||
134 | PERL_STATIC_INLINE UV | |
135 | S_invlist_highest(SV* const invlist) | |
136 | { | |
137 | /* Returns the highest code point that matches an inversion list. This API | |
138 | * has an ambiguity, as it returns 0 under either the highest is actually | |
139 | * 0, or if the list is empty. If this distinction matters to you, check | |
140 | * for emptiness before calling this function */ | |
141 | ||
142 | UV len = _invlist_len(invlist); | |
143 | UV *array; | |
144 | ||
145 | PERL_ARGS_ASSERT_INVLIST_HIGHEST; | |
146 | ||
147 | if (len == 0) { | |
1604cfb0 | 148 | return 0; |
a9269870 KW |
149 | } |
150 | ||
151 | array = invlist_array(invlist); | |
152 | ||
153 | /* The last element in the array in the inversion list always starts a | |
154 | * range that goes to infinity. That range may be for code points that are | |
155 | * matched in the inversion list, or it may be for ones that aren't | |
156 | * matched. In the latter case, the highest code point in the set is one | |
157 | * less than the beginning of this range; otherwise it is the final element | |
158 | * of this range: infinity */ | |
159 | return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1)) | |
160 | ? UV_MAX | |
161 | : array[len - 1] - 1; | |
162 | } | |
163 | ||
bcdc9e1e KW |
164 | # if defined(PERL_IN_REGCOMP_C) |
165 | ||
166 | PERL_STATIC_INLINE UV | |
167 | S_invlist_highest_range_start(SV* const invlist) | |
168 | { | |
80c22fb4 JK |
169 | /* Returns the lowest code point of the highest range in the inversion |
170 | * list parameter. This API has an ambiguity: it returns 0 either when | |
171 | * the lowest such point is actually 0 or when the list is empty. If this | |
172 | * distinction matters to you, check for emptiness before calling this | |
173 | * function. */ | |
bcdc9e1e KW |
174 | |
175 | UV len = _invlist_len(invlist); | |
176 | UV *array; | |
177 | ||
178 | PERL_ARGS_ASSERT_INVLIST_HIGHEST_RANGE_START; | |
179 | ||
180 | if (len == 0) { | |
181 | return 0; | |
182 | } | |
183 | ||
184 | array = invlist_array(invlist); | |
185 | ||
186 | /* The last element in the array in the inversion list always starts a | |
187 | * range that goes to infinity. That range may be for code points that are | |
188 | * matched in the inversion list, or it may be for ones that aren't | |
189 | * matched. In the first case, the lowest code point in the matching range | |
190 | * is that the one that started the range. If the other case, the final | |
191 | * matching range begins at the next element down (which may be 0 in the | |
192 | * edge case). */ | |
193 | return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1)) | |
194 | ? array[len - 1] | |
195 | : len == 1 | |
196 | ? 0 | |
197 | : array[len - 2]; | |
198 | } | |
199 | ||
200 | # endif | |
1bb4bd13 KW |
201 | #endif |
202 | #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) | |
203 | ||
a9269870 KW |
204 | PERL_STATIC_INLINE STRLEN* |
205 | S_get_invlist_iter_addr(SV* invlist) | |
206 | { | |
207 | /* Return the address of the UV that contains the current iteration | |
208 | * position */ | |
209 | ||
210 | PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR; | |
211 | ||
212 | assert(is_invlist(invlist)); | |
213 | ||
214 | return &(((XINVLIST*) SvANY(invlist))->iterator); | |
215 | } | |
216 | ||
217 | PERL_STATIC_INLINE void | |
218 | S_invlist_iterinit(SV* invlist) /* Initialize iterator for invlist */ | |
219 | { | |
220 | PERL_ARGS_ASSERT_INVLIST_ITERINIT; | |
221 | ||
222 | *get_invlist_iter_addr(invlist) = 0; | |
223 | } | |
224 | ||
225 | PERL_STATIC_INLINE void | |
226 | S_invlist_iterfinish(SV* invlist) | |
227 | { | |
228 | /* Terminate iterator for invlist. This is to catch development errors. | |
229 | * Any iteration that is interrupted before completed should call this | |
230 | * function. Functions that add code points anywhere else but to the end | |
231 | * of an inversion list assert that they are not in the middle of an | |
232 | * iteration. If they were, the addition would make the iteration | |
233 | * problematical: if the iteration hadn't reached the place where things | |
234 | * were being added, it would be ok */ | |
235 | ||
236 | PERL_ARGS_ASSERT_INVLIST_ITERFINISH; | |
237 | ||
238 | *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX; | |
239 | } | |
240 | ||
241 | STATIC bool | |
242 | S_invlist_iternext(SV* invlist, UV* start, UV* end) | |
243 | { | |
244 | /* An C<invlist_iterinit> call on <invlist> must be used to set this up. | |
245 | * This call sets in <*start> and <*end>, the next range in <invlist>. | |
246 | * Returns <TRUE> if successful and the next call will return the next | |
247 | * range; <FALSE> if was already at the end of the list. If the latter, | |
248 | * <*start> and <*end> are unchanged, and the next call to this function | |
249 | * will start over at the beginning of the list */ | |
250 | ||
251 | STRLEN* pos = get_invlist_iter_addr(invlist); | |
252 | UV len = _invlist_len(invlist); | |
253 | UV *array; | |
254 | ||
255 | PERL_ARGS_ASSERT_INVLIST_ITERNEXT; | |
256 | ||
257 | if (*pos >= len) { | |
1604cfb0 MS |
258 | *pos = (STRLEN) UV_MAX; /* Force iterinit() to be required next time */ |
259 | return FALSE; | |
a9269870 KW |
260 | } |
261 | ||
262 | array = invlist_array(invlist); | |
263 | ||
264 | *start = array[(*pos)++]; | |
265 | ||
266 | if (*pos >= len) { | |
1604cfb0 | 267 | *end = UV_MAX; |
a9269870 KW |
268 | } |
269 | else { | |
1604cfb0 | 270 | *end = array[(*pos)++] - 1; |
a9269870 KW |
271 | } |
272 | ||
273 | return TRUE; | |
274 | } | |
275 | ||
276 | #endif | |
277 | ||
278 | #ifndef PERL_IN_REGCOMP_C | |
4c60406d KW |
279 | |
280 | /* These symbols are only needed later in regcomp.c */ | |
281 | # undef TO_INTERNAL_SIZE | |
282 | # undef FROM_INTERNAL_SIZE | |
81e983c1 | 283 | #endif |
3dd7db29 JK |
284 | |
285 | #endif /* PERL_INVLIST_INLINE_H_ */ |