This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
locale.c: Rmv duplicate strlen()
[perl5.git] / invlist_inline.h
CommitLineData
b992490d 1/* invlist_inline.h
81e983c1
KW
2 *
3 * Copyright (C) 2012 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
81e983c1
KW
7 */
8
3dd7db29
JK
9#ifndef PERL_INVLIST_INLINE_H_
10#define PERL_INVLIST_INLINE_H_
11
de9382e0 12#if defined(PERL_IN_UTF8_C) \
85900e28 13 || defined(PERL_IN_REGCOMP_ANY) \
de9382e0
KW
14 || defined(PERL_IN_REGEXEC_C) \
15 || defined(PERL_IN_TOKE_C) \
58a0d047 16 || defined(PERL_IN_PP_C) \
f34acfec
KW
17 || defined(PERL_IN_OP_C) \
18 || defined(PERL_IN_DOOP_C)
81e983c1 19
9e7f4f43
KW
20/* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
21 * etc */
22#define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1))
23#define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i))
24
4c60406d
KW
25/* This converts to/from our UVs to what the SV code is expecting: bytes. */
26#define TO_INTERNAL_SIZE(x) ((x) * sizeof(UV))
27#define FROM_INTERNAL_SIZE(x) ((x)/ sizeof(UV))
28
510ceaa0 29PERL_STATIC_INLINE bool
ffb1f0fd 30S_is_invlist(const SV* const invlist)
510ceaa0 31{
465848b5 32 return invlist != NULL && SvTYPE(invlist) == SVt_INVLIST;
510ceaa0
KW
33}
34
4c60406d 35PERL_STATIC_INLINE bool*
dc3bf405 36S_get_invlist_offset_addr(SV* invlist)
9e7f4f43 37{
4c60406d
KW
38 /* Return the address of the field that says whether the inversion list is
39 * offset (it contains 1) or not (contains 0) */
4c60406d 40 PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR;
9e7f4f43 41
510ceaa0 42 assert(is_invlist(invlist));
f49d8074 43
4c60406d 44 return &(((XINVLIST*) SvANY(invlist))->is_offset);
9e7f4f43
KW
45}
46
47PERL_STATIC_INLINE UV
dc3bf405 48S__invlist_len(SV* const invlist)
9e7f4f43
KW
49{
50 /* Returns the current number of elements stored in the inversion list's
51 * array */
52
53 PERL_ARGS_ASSERT__INVLIST_LEN;
54
510ceaa0 55 assert(is_invlist(invlist));
f49d8074 56
4c60406d
KW
57 return (SvCUR(invlist) == 0)
58 ? 0
59 : FROM_INTERNAL_SIZE(SvCUR(invlist)) - *get_invlist_offset_addr(invlist);
9e7f4f43
KW
60}
61
62PERL_STATIC_INLINE bool
dc3bf405 63S__invlist_contains_cp(SV* const invlist, const UV cp)
9e7f4f43
KW
64{
65 /* Does <invlist> contain code point <cp> as part of the set? */
66
67 IV index = _invlist_search(invlist, cp);
68
69 PERL_ARGS_ASSERT__INVLIST_CONTAINS_CP;
70
71 return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index);
72}
73
551cedb5
KW
74PERL_STATIC_INLINE UV*
75S_invlist_array(SV* const invlist)
76{
77 /* Returns the pointer to the inversion list's array. Every time the
78 * length changes, this needs to be called in case malloc or realloc moved
79 * it */
80
81 PERL_ARGS_ASSERT_INVLIST_ARRAY;
82
83 /* Must not be empty. If these fail, you probably didn't check for <len>
84 * being non-zero before trying to get the array */
85 assert(_invlist_len(invlist));
86
87 /* The very first element always contains zero, The array begins either
88 * there, or if the inversion list is offset, at the element after it.
89 * The offset header field determines which; it contains 0 or 1 to indicate
90 * how much additionally to add */
91 assert(0 == *(SvPVX(invlist)));
92 return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist));
93}
94
a9269870 95#endif
85900e28 96#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C)
a9269870
KW
97
98PERL_STATIC_INLINE void
99S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
100{
101 /* Grow the maximum size of an inversion list */
102
103 PERL_ARGS_ASSERT_INVLIST_EXTEND;
104
105 assert(SvTYPE(invlist) == SVt_INVLIST);
106
107 /* Add one to account for the zero element at the beginning which may not
108 * be counted by the calling parameters */
109 SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1));
110}
111
112PERL_STATIC_INLINE void
113S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
114{
115 /* Sets the current number of elements stored in the inversion list.
116 * Updates SvCUR correspondingly */
117 PERL_UNUSED_CONTEXT;
118 PERL_ARGS_ASSERT_INVLIST_SET_LEN;
119
120 assert(SvTYPE(invlist) == SVt_INVLIST);
121
122 SvCUR_set(invlist,
123 (len == 0)
124 ? 0
125 : TO_INTERNAL_SIZE(len + offset));
126 assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist));
127}
128
129PERL_STATIC_INLINE SV*
130S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) {
131 return _add_range_to_invlist(invlist, cp, cp);
132}
133
134PERL_STATIC_INLINE UV
135S_invlist_highest(SV* const invlist)
136{
137 /* Returns the highest code point that matches an inversion list. This API
138 * has an ambiguity, as it returns 0 under either the highest is actually
139 * 0, or if the list is empty. If this distinction matters to you, check
140 * for emptiness before calling this function */
141
142 UV len = _invlist_len(invlist);
143 UV *array;
144
145 PERL_ARGS_ASSERT_INVLIST_HIGHEST;
146
147 if (len == 0) {
1604cfb0 148 return 0;
a9269870
KW
149 }
150
151 array = invlist_array(invlist);
152
153 /* The last element in the array in the inversion list always starts a
154 * range that goes to infinity. That range may be for code points that are
155 * matched in the inversion list, or it may be for ones that aren't
156 * matched. In the latter case, the highest code point in the set is one
157 * less than the beginning of this range; otherwise it is the final element
158 * of this range: infinity */
159 return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
160 ? UV_MAX
161 : array[len - 1] - 1;
162}
163
85900e28 164# if defined(PERL_IN_REGCOMP_ANY)
bcdc9e1e
KW
165
166PERL_STATIC_INLINE UV
167S_invlist_highest_range_start(SV* const invlist)
168{
80c22fb4
JK
169 /* Returns the lowest code point of the highest range in the inversion
170 * list parameter. This API has an ambiguity: it returns 0 either when
171 * the lowest such point is actually 0 or when the list is empty. If this
172 * distinction matters to you, check for emptiness before calling this
173 * function. */
bcdc9e1e
KW
174
175 UV len = _invlist_len(invlist);
176 UV *array;
177
178 PERL_ARGS_ASSERT_INVLIST_HIGHEST_RANGE_START;
179
180 if (len == 0) {
181 return 0;
182 }
183
184 array = invlist_array(invlist);
185
186 /* The last element in the array in the inversion list always starts a
187 * range that goes to infinity. That range may be for code points that are
188 * matched in the inversion list, or it may be for ones that aren't
189 * matched. In the first case, the lowest code point in the matching range
190 * is that the one that started the range. If the other case, the final
191 * matching range begins at the next element down (which may be 0 in the
192 * edge case). */
193 return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
194 ? array[len - 1]
195 : len == 1
196 ? 0
197 : array[len - 2];
198}
199
200# endif
1bb4bd13 201#endif
85900e28 202#if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C)
1bb4bd13 203
a9269870
KW
204PERL_STATIC_INLINE STRLEN*
205S_get_invlist_iter_addr(SV* invlist)
206{
207 /* Return the address of the UV that contains the current iteration
208 * position */
209
210 PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR;
211
212 assert(is_invlist(invlist));
213
214 return &(((XINVLIST*) SvANY(invlist))->iterator);
215}
216
217PERL_STATIC_INLINE void
218S_invlist_iterinit(SV* invlist) /* Initialize iterator for invlist */
219{
220 PERL_ARGS_ASSERT_INVLIST_ITERINIT;
221
222 *get_invlist_iter_addr(invlist) = 0;
223}
224
225PERL_STATIC_INLINE void
226S_invlist_iterfinish(SV* invlist)
227{
228 /* Terminate iterator for invlist. This is to catch development errors.
229 * Any iteration that is interrupted before completed should call this
230 * function. Functions that add code points anywhere else but to the end
231 * of an inversion list assert that they are not in the middle of an
232 * iteration. If they were, the addition would make the iteration
233 * problematical: if the iteration hadn't reached the place where things
234 * were being added, it would be ok */
235
236 PERL_ARGS_ASSERT_INVLIST_ITERFINISH;
237
238 *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX;
239}
240
241STATIC bool
242S_invlist_iternext(SV* invlist, UV* start, UV* end)
243{
244 /* An C<invlist_iterinit> call on <invlist> must be used to set this up.
245 * This call sets in <*start> and <*end>, the next range in <invlist>.
246 * Returns <TRUE> if successful and the next call will return the next
247 * range; <FALSE> if was already at the end of the list. If the latter,
248 * <*start> and <*end> are unchanged, and the next call to this function
249 * will start over at the beginning of the list */
250
251 STRLEN* pos = get_invlist_iter_addr(invlist);
252 UV len = _invlist_len(invlist);
253 UV *array;
254
255 PERL_ARGS_ASSERT_INVLIST_ITERNEXT;
256
257 if (*pos >= len) {
1604cfb0
MS
258 *pos = (STRLEN) UV_MAX; /* Force iterinit() to be required next time */
259 return FALSE;
a9269870
KW
260 }
261
262 array = invlist_array(invlist);
263
264 *start = array[(*pos)++];
265
266 if (*pos >= len) {
1604cfb0 267 *end = UV_MAX;
a9269870
KW
268 }
269 else {
1604cfb0 270 *end = array[(*pos)++] - 1;
a9269870
KW
271 }
272
273 return TRUE;
274}
275
276#endif
277
85900e28 278#ifndef PERL_IN_REGCOMP_ANY
4c60406d
KW
279
280/* These symbols are only needed later in regcomp.c */
281# undef TO_INTERNAL_SIZE
282# undef FROM_INTERNAL_SIZE
81e983c1 283#endif
3dd7db29 284
85900e28
YO
285#ifdef PERL_IN_REGCOMP_ANY
286PERL_STATIC_INLINE
287bool
288S_invlist_is_iterating(const SV* const invlist)
289{
290 PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
291
292 /* get_invlist_iter_addr()'s sv is non-const only because it returns a
293 * value that can be used to modify the invlist, it doesn't modify the
294 * invlist itself */
295 return *(get_invlist_iter_addr((SV*)invlist)) < (STRLEN) UV_MAX;
296}
297
298PERL_STATIC_INLINE
299SV *
300S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
301{
302 /* Get the contents of an inversion list into a string SV so that they can
303 * be printed out. If 'traditional_style' is TRUE, it uses the format
304 * traditionally done for debug tracing; otherwise it uses a format
305 * suitable for just copying to the output, with blanks between ranges and
306 * a dash between range components */
307
308 UV start, end;
309 SV* output;
310 const char intra_range_delimiter = (traditional_style ? '\t' : '-');
311 const char inter_range_delimiter = (traditional_style ? '\n' : ' ');
312
313 if (traditional_style) {
314 output = newSVpvs("\n");
315 }
316 else {
317 output = newSVpvs("");
318 }
319
320 PERL_ARGS_ASSERT_INVLIST_CONTENTS;
321
322 assert(! invlist_is_iterating(invlist));
323
324 invlist_iterinit(invlist);
325 while (invlist_iternext(invlist, &start, &end)) {
326 if (end == UV_MAX) {
327 Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c",
328 start, intra_range_delimiter,
329 inter_range_delimiter);
330 }
331 else if (end != start) {
332 Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c",
333 start,
334 intra_range_delimiter,
335 end, inter_range_delimiter);
336 }
337 else {
338 Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c",
339 start, inter_range_delimiter);
340 }
341 }
342
343 if (SvCUR(output) && ! traditional_style) {/* Get rid of trailing blank */
344 SvCUR_set(output, SvCUR(output) - 1);
345 }
346
347 return output;
348}
349
350PERL_STATIC_INLINE
351UV
352S_invlist_lowest(SV* const invlist)
353{
354 /* Returns the lowest code point that matches an inversion list. This API
355 * has an ambiguity, as it returns 0 under either the lowest is actually
356 * 0, or if the list is empty. If this distinction matters to you, check
357 * for emptiness before calling this function */
358
359 UV len = _invlist_len(invlist);
360 UV *array;
361
362 PERL_ARGS_ASSERT_INVLIST_LOWEST;
363
364 if (len == 0) {
365 return 0;
366 }
367
368 array = invlist_array(invlist);
369
370 return array[0];
371}
372
373#endif
374
3dd7db29 375#endif /* PERL_INVLIST_INLINE_H_ */