Commit | Line | Data |
---|---|---|
b992490d | 1 | /* invlist_inline.h |
81e983c1 KW |
2 | * |
3 | * Copyright (C) 2012 by Larry Wall and others | |
4 | * | |
5 | * You may distribute under the terms of either the GNU General Public | |
6 | * License or the Artistic License, as specified in the README file. | |
81e983c1 KW |
7 | */ |
8 | ||
3dd7db29 JK |
9 | #ifndef PERL_INVLIST_INLINE_H_ |
10 | #define PERL_INVLIST_INLINE_H_ | |
11 | ||
de9382e0 KW |
12 | #if defined(PERL_IN_UTF8_C) \ |
13 | || defined(PERL_IN_REGCOMP_C) \ | |
14 | || defined(PERL_IN_REGEXEC_C) \ | |
15 | || defined(PERL_IN_TOKE_C) \ | |
58a0d047 | 16 | || defined(PERL_IN_PP_C) \ |
f34acfec KW |
17 | || defined(PERL_IN_OP_C) \ |
18 | || defined(PERL_IN_DOOP_C) | |
81e983c1 | 19 | |
9e7f4f43 KW |
20 | /* An element is in an inversion list iff its index is even numbered: 0, 2, 4, |
21 | * etc */ | |
22 | #define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1)) | |
23 | #define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i)) | |
24 | ||
4c60406d KW |
25 | /* This converts to/from our UVs to what the SV code is expecting: bytes. */ |
26 | #define TO_INTERNAL_SIZE(x) ((x) * sizeof(UV)) | |
27 | #define FROM_INTERNAL_SIZE(x) ((x)/ sizeof(UV)) | |
28 | ||
510ceaa0 KW |
29 | PERL_STATIC_INLINE bool |
30 | S_is_invlist(SV* const invlist) | |
31 | { | |
465848b5 | 32 | return invlist != NULL && SvTYPE(invlist) == SVt_INVLIST; |
510ceaa0 KW |
33 | } |
34 | ||
4c60406d | 35 | PERL_STATIC_INLINE bool* |
dc3bf405 | 36 | S_get_invlist_offset_addr(SV* invlist) |
9e7f4f43 | 37 | { |
4c60406d KW |
38 | /* Return the address of the field that says whether the inversion list is |
39 | * offset (it contains 1) or not (contains 0) */ | |
4c60406d | 40 | PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR; |
9e7f4f43 | 41 | |
510ceaa0 | 42 | assert(is_invlist(invlist)); |
f49d8074 | 43 | |
4c60406d | 44 | return &(((XINVLIST*) SvANY(invlist))->is_offset); |
9e7f4f43 KW |
45 | } |
46 | ||
47 | PERL_STATIC_INLINE UV | |
dc3bf405 | 48 | S__invlist_len(SV* const invlist) |
9e7f4f43 KW |
49 | { |
50 | /* Returns the current number of elements stored in the inversion list's | |
51 | * array */ | |
52 | ||
53 | PERL_ARGS_ASSERT__INVLIST_LEN; | |
54 | ||
510ceaa0 | 55 | assert(is_invlist(invlist)); |
f49d8074 | 56 | |
4c60406d KW |
57 | return (SvCUR(invlist) == 0) |
58 | ? 0 | |
59 | : FROM_INTERNAL_SIZE(SvCUR(invlist)) - *get_invlist_offset_addr(invlist); | |
9e7f4f43 KW |
60 | } |
61 | ||
62 | PERL_STATIC_INLINE bool | |
dc3bf405 | 63 | S__invlist_contains_cp(SV* const invlist, const UV cp) |
9e7f4f43 KW |
64 | { |
65 | /* Does <invlist> contain code point <cp> as part of the set? */ | |
66 | ||
67 | IV index = _invlist_search(invlist, cp); | |
68 | ||
69 | PERL_ARGS_ASSERT__INVLIST_CONTAINS_CP; | |
70 | ||
71 | return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index); | |
72 | } | |
73 | ||
551cedb5 KW |
74 | PERL_STATIC_INLINE UV* |
75 | S_invlist_array(SV* const invlist) | |
76 | { | |
77 | /* Returns the pointer to the inversion list's array. Every time the | |
78 | * length changes, this needs to be called in case malloc or realloc moved | |
79 | * it */ | |
80 | ||
81 | PERL_ARGS_ASSERT_INVLIST_ARRAY; | |
82 | ||
83 | /* Must not be empty. If these fail, you probably didn't check for <len> | |
84 | * being non-zero before trying to get the array */ | |
85 | assert(_invlist_len(invlist)); | |
86 | ||
87 | /* The very first element always contains zero, The array begins either | |
88 | * there, or if the inversion list is offset, at the element after it. | |
89 | * The offset header field determines which; it contains 0 or 1 to indicate | |
90 | * how much additionally to add */ | |
91 | assert(0 == *(SvPVX(invlist))); | |
92 | return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist)); | |
93 | } | |
94 | ||
a9269870 | 95 | #endif |
f34acfec | 96 | #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C) |
a9269870 KW |
97 | |
98 | PERL_STATIC_INLINE void | |
99 | S_invlist_extend(pTHX_ SV* const invlist, const UV new_max) | |
100 | { | |
101 | /* Grow the maximum size of an inversion list */ | |
102 | ||
103 | PERL_ARGS_ASSERT_INVLIST_EXTEND; | |
104 | ||
105 | assert(SvTYPE(invlist) == SVt_INVLIST); | |
106 | ||
107 | /* Add one to account for the zero element at the beginning which may not | |
108 | * be counted by the calling parameters */ | |
109 | SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1)); | |
110 | } | |
111 | ||
112 | PERL_STATIC_INLINE void | |
113 | S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset) | |
114 | { | |
115 | /* Sets the current number of elements stored in the inversion list. | |
116 | * Updates SvCUR correspondingly */ | |
117 | PERL_UNUSED_CONTEXT; | |
118 | PERL_ARGS_ASSERT_INVLIST_SET_LEN; | |
119 | ||
120 | assert(SvTYPE(invlist) == SVt_INVLIST); | |
121 | ||
122 | SvCUR_set(invlist, | |
123 | (len == 0) | |
124 | ? 0 | |
125 | : TO_INTERNAL_SIZE(len + offset)); | |
126 | assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist)); | |
127 | } | |
128 | ||
129 | PERL_STATIC_INLINE SV* | |
130 | S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) { | |
131 | return _add_range_to_invlist(invlist, cp, cp); | |
132 | } | |
133 | ||
134 | PERL_STATIC_INLINE UV | |
135 | S_invlist_highest(SV* const invlist) | |
136 | { | |
137 | /* Returns the highest code point that matches an inversion list. This API | |
138 | * has an ambiguity, as it returns 0 under either the highest is actually | |
139 | * 0, or if the list is empty. If this distinction matters to you, check | |
140 | * for emptiness before calling this function */ | |
141 | ||
142 | UV len = _invlist_len(invlist); | |
143 | UV *array; | |
144 | ||
145 | PERL_ARGS_ASSERT_INVLIST_HIGHEST; | |
146 | ||
147 | if (len == 0) { | |
148 | return 0; | |
149 | } | |
150 | ||
151 | array = invlist_array(invlist); | |
152 | ||
153 | /* The last element in the array in the inversion list always starts a | |
154 | * range that goes to infinity. That range may be for code points that are | |
155 | * matched in the inversion list, or it may be for ones that aren't | |
156 | * matched. In the latter case, the highest code point in the set is one | |
157 | * less than the beginning of this range; otherwise it is the final element | |
158 | * of this range: infinity */ | |
159 | return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1)) | |
160 | ? UV_MAX | |
161 | : array[len - 1] - 1; | |
162 | } | |
163 | ||
1bb4bd13 KW |
164 | #endif |
165 | #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_OP_C) | |
166 | ||
a9269870 KW |
167 | PERL_STATIC_INLINE STRLEN* |
168 | S_get_invlist_iter_addr(SV* invlist) | |
169 | { | |
170 | /* Return the address of the UV that contains the current iteration | |
171 | * position */ | |
172 | ||
173 | PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR; | |
174 | ||
175 | assert(is_invlist(invlist)); | |
176 | ||
177 | return &(((XINVLIST*) SvANY(invlist))->iterator); | |
178 | } | |
179 | ||
180 | PERL_STATIC_INLINE void | |
181 | S_invlist_iterinit(SV* invlist) /* Initialize iterator for invlist */ | |
182 | { | |
183 | PERL_ARGS_ASSERT_INVLIST_ITERINIT; | |
184 | ||
185 | *get_invlist_iter_addr(invlist) = 0; | |
186 | } | |
187 | ||
188 | PERL_STATIC_INLINE void | |
189 | S_invlist_iterfinish(SV* invlist) | |
190 | { | |
191 | /* Terminate iterator for invlist. This is to catch development errors. | |
192 | * Any iteration that is interrupted before completed should call this | |
193 | * function. Functions that add code points anywhere else but to the end | |
194 | * of an inversion list assert that they are not in the middle of an | |
195 | * iteration. If they were, the addition would make the iteration | |
196 | * problematical: if the iteration hadn't reached the place where things | |
197 | * were being added, it would be ok */ | |
198 | ||
199 | PERL_ARGS_ASSERT_INVLIST_ITERFINISH; | |
200 | ||
201 | *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX; | |
202 | } | |
203 | ||
204 | STATIC bool | |
205 | S_invlist_iternext(SV* invlist, UV* start, UV* end) | |
206 | { | |
207 | /* An C<invlist_iterinit> call on <invlist> must be used to set this up. | |
208 | * This call sets in <*start> and <*end>, the next range in <invlist>. | |
209 | * Returns <TRUE> if successful and the next call will return the next | |
210 | * range; <FALSE> if was already at the end of the list. If the latter, | |
211 | * <*start> and <*end> are unchanged, and the next call to this function | |
212 | * will start over at the beginning of the list */ | |
213 | ||
214 | STRLEN* pos = get_invlist_iter_addr(invlist); | |
215 | UV len = _invlist_len(invlist); | |
216 | UV *array; | |
217 | ||
218 | PERL_ARGS_ASSERT_INVLIST_ITERNEXT; | |
219 | ||
220 | if (*pos >= len) { | |
221 | *pos = (STRLEN) UV_MAX; /* Force iterinit() to be required next time */ | |
222 | return FALSE; | |
223 | } | |
224 | ||
225 | array = invlist_array(invlist); | |
226 | ||
227 | *start = array[(*pos)++]; | |
228 | ||
229 | if (*pos >= len) { | |
230 | *end = UV_MAX; | |
231 | } | |
232 | else { | |
233 | *end = array[(*pos)++] - 1; | |
234 | } | |
235 | ||
236 | return TRUE; | |
237 | } | |
238 | ||
239 | #endif | |
240 | ||
241 | #ifndef PERL_IN_REGCOMP_C | |
4c60406d KW |
242 | |
243 | /* These symbols are only needed later in regcomp.c */ | |
244 | # undef TO_INTERNAL_SIZE | |
245 | # undef FROM_INTERNAL_SIZE | |
81e983c1 | 246 | #endif |
3dd7db29 JK |
247 | |
248 | #endif /* PERL_INVLIST_INLINE_H_ */ |