(int)offset, RExC_precomp, RExC_precomp + offset); \
} STMT_END
-#define ckWARN2regdep(loc,m, a1) STMT_START { \
+#define ckWARN2reg_d(loc,m, a1) STMT_START { \
const IV offset = loc - RExC_precomp; \
- Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP), \
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP), \
m REPORT_LOCATION, \
a1, (int)offset, RExC_precomp, RExC_precomp + offset); \
} STMT_END
}
/* These two functions currently do the exact same thing */
-#define cl_init_zero S_cl_init
+#define cl_init_zero cl_init
/* 'AND' a given class with another one. Can create false positives. 'cl'
* should not be inverted. 'and_with->flags & ANYOF_CLASS' should be 0 if
/* This section of code defines the inversion list object and its methods. The
* interfaces are highly subject to change, so as much as possible is static to
- * this file. An inversion list is here implemented as a malloc'd C UV array.
- * Currently it is a SVt_PVLV, with some of the header fields from that
- * repurposed for uses here.
+ * this file. An inversion list is here implemented as a malloc'd C UV array
+ * as an SVt_INVLIST scalar.
*
* An inversion list for Unicode is an array of code points, sorted by ordinal
* number. The zeroth element is the first code point in the list. The 1th
* list.)
* Taking the complement (inverting) an inversion list is quite simple, if the
* first element is 0, remove it; otherwise add a 0 element at the beginning.
- * This implementation reserves an element (considered to be the final element
- * of the header) at the beginning of each inversion list to always contain 0;
- * there is an additional flag in the header which indicates if the list begins
- * at the 0, or is offset to begin at the next element.
+ * This implementation reserves an element at the beginning of each inversion
+ * list to always contain 0; there is an additional flag in the header which
+ * indicates if the list begins at the 0, or is offset to begin at the next
+ * element.
*
* More about inversion lists can be found in "Unicode Demystified"
* Chapter 13 by Richard Gillam, published by Addison-Wesley.
/* The header definitions are in F<inline_invlist.c> */
-/* This converts to/from our UVs to what the SV code is expecting: bytes. The
- * first element is always a 0, which may or may not currently be in the list.
- * Just assume the worst case, that it isn't, and so the length of the list
- * passed in has to add 1 to account for it */
-#define TO_INTERNAL_SIZE(x) (((x) + 1) * sizeof(UV))
-#define FROM_INTERNAL_SIZE(x) (((x)/ sizeof(UV)) - 1)
-
-
PERL_STATIC_INLINE UV*
S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
{
* element is either the element reserved for 0, if TRUE, or the element
* after it, if FALSE */
- U8* offset = get_invlist_offset_addr(invlist);
+ bool* offset = get_invlist_offset_addr(invlist);
UV* zero_addr = (UV *) SvPVX(invlist);
PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT;
/* Must be empty */
- assert(! *_get_invlist_len_addr(invlist));
+ assert(! _invlist_len(invlist));
*zero_addr = 0;
/* Must not be empty. If these fail, you probably didn't check for <len>
* being non-zero before trying to get the array */
- assert(*_get_invlist_len_addr(invlist));
- assert(*get_invlist_offset_addr(invlist) == 0
- || *get_invlist_offset_addr(invlist) == 1);
+ assert(_invlist_len(invlist));
/* The very first element always contains zero, The array begins either
* there, or if the inversion list is offset, at the element after it.
}
PERL_STATIC_INLINE void
-S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
+S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
{
- /* Sets the current number of elements stored in the inversion list */
+ /* Sets the current number of elements stored in the inversion list.
+ * Updates SvCUR correspondingly */
PERL_ARGS_ASSERT_INVLIST_SET_LEN;
- *_get_invlist_len_addr(invlist) = len;
-
- assert(SvLEN(invlist) == 0 || len <= SvLEN(invlist));
+ assert(SvTYPE(invlist) == SVt_INVLIST);
- SvCUR_set(invlist, TO_INTERNAL_SIZE(len));
- /* Note that when inverting, SvCUR shouldn't change */
+ SvCUR_set(invlist,
+ (len == 0)
+ ? 0
+ : TO_INTERNAL_SIZE(len + offset));
+ assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist));
}
PERL_STATIC_INLINE IV*
S_get_invlist_previous_index_addr(pTHX_ SV* invlist)
{
- /* Return the address of the UV that is reserved to hold the cached index
+ /* Return the address of the IV that is reserved to hold the cached index
* */
PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
- return &(((XPVLV*) SvANY(invlist))->xiv_u.xivu_iv);
+ assert(SvTYPE(invlist) == SVt_INVLIST);
+
+ return &(((XINVLIST*) SvANY(invlist))->prev_index);
}
PERL_STATIC_INLINE IV
PERL_ARGS_ASSERT_INVLIST_MAX;
- return SvLEN(invlist) == 0 /* This happens under _new_invlist_C_array */
- ? _invlist_len(invlist)
- : FROM_INTERNAL_SIZE(SvLEN(invlist));
-}
+ assert(SvTYPE(invlist) == SVt_INVLIST);
-PERL_STATIC_INLINE U8*
-S_get_invlist_offset_addr(pTHX_ SV* invlist)
-{
- /* Return the address of the field that says whether the inversion list is
- * offset (it contains 1) or not (contains 0) */
-
- PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR;
-
- return (U8*) &(LvFLAGS(invlist));
+ /* Assumes worst case, in which the 0 element is not counted in the
+ * inversion list, so subtracts 1 for that */
+ return SvLEN(invlist) == 0 /* This happens under _new_invlist_C_array */
+ ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
+ : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
}
#ifndef PERL_IN_XSUB_RE
* system default is used instead */
SV* new_list;
- U8* offset_addr;
if (initial_size < 0) {
initial_size = 10;
}
/* Allocate the initial space */
- new_list = newSV_type(SVt_PVLV);
- SvGROW(new_list, TO_INTERNAL_SIZE(initial_size) + 1); /* 1 is for trailing
- NUL */
- invlist_set_len(new_list, 0);
+ new_list = newSV_type(SVt_INVLIST);
+
+ /* First 1 is in case the zero element isn't in the list; second 1 is for
+ * trailing NUL */
+ SvGROW(new_list, TO_INTERNAL_SIZE(initial_size + 1) + 1);
+ invlist_set_len(new_list, 0, 0);
/* Force iterinit() to be used to get iteration to work */
*get_invlist_iter_addr(new_list) = (STRLEN) UV_MAX;
- /* This should force a segfault if a method doesn't initialize this
- * properly. XXX Although now that the max is currently just 255, it might
- * not segfault. */
- offset_addr = get_invlist_offset_addr(new_list);
- *offset_addr = (U8) UV_MAX;
-
*get_invlist_previous_index_addr(new_list) = 0;
return new_list;
const STRLEN length = (STRLEN) list[0];
const UV version_id = list[1];
- const U8 offset = (U8) list[2];
+ const bool offset = cBOOL(list[2]);
#define HEADER_LENGTH 3
/* If any of the above changes in any way, you must change HEADER_LENGTH
* (if appropriate) and regenerate INVLIST_VERSION_ID by running
* perl -E 'say int(rand 2**31-1)'
*/
-#define INVLIST_VERSION_ID 1826693541/* This is a combination of a version and
+#define INVLIST_VERSION_ID 148565664 /* This is a combination of a version and
data structure type, so that one being
passed in can be validated to be an
inversion list of the correct vintage.
*/
- SV* invlist = newSV_type(SVt_PVLV);
+ SV* invlist = newSV_type(SVt_INVLIST);
PERL_ARGS_ASSERT__NEW_INVLIST_C_ARRAY;
SvLEN_set(invlist, 0); /* Means we own the contents, and the system
shouldn't touch it */
- /* The 'length' passed to us is the number of elements in the inversion
- * list. If the list doesn't include the always present 0 element, the
- * length should be adjusted upwards to include it. The TO_INTERNAL_SIZE
- * macro returns a worst case scenario, always making that adjustment, even
- * if not needed. To get the precise size, then, we have to subtract 1
- * when that adjustment wasn't needed. That happens when the offset was 0;
- * the exclusive-or flips the 0 to 1, and vice versa */
- SvCUR_set(invlist, TO_INTERNAL_SIZE(length - (offset ^ 1)));
-
- invlist_set_len(invlist, length);
+
*(get_invlist_offset_addr(invlist)) = offset;
+
+ /* The 'length' passed to us is the physical number of elements in the
+ * inversion list. But if there is an offset the logical number is one
+ * less than that */
+ invlist_set_len(invlist, length - offset, offset);
+
invlist_set_previous_index(invlist, 0);
/* Initialize the iteration pointer. */
PERL_ARGS_ASSERT_INVLIST_EXTEND;
- SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max));
+ assert(SvTYPE(invlist) == SVt_INVLIST);
+
+ /* Add one to account for the zero element at the beginning which may not
+ * be counted by the calling parameters */
+ SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1));
}
PERL_STATIC_INLINE void
{
PERL_ARGS_ASSERT_INVLIST_TRIM;
+ assert(SvTYPE(invlist) == SVt_INVLIST);
+
/* Change the length of the inversion list to how many entries it currently
* has */
-
SvPV_shrink_to_cur((SV *) invlist);
}
UV* array;
UV max = invlist_max(invlist);
UV len = _invlist_len(invlist);
+ bool offset;
PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST;
if (len == 0) { /* Empty lists must be initialized */
- array = _invlist_array_init(invlist, start == 0);
+ offset = start != 0;
+ array = _invlist_array_init(invlist, ! offset);
}
else {
/* Here, the existing list is non-empty. The current max entry in the
* value not in the set, it is extending the set, so the new first
* value not in the set is one greater than the newly extended range.
* */
+ offset = *get_invlist_offset_addr(invlist);
if (array[final_element] == start) {
if (end != UV_MAX) {
array[final_element] = end + 1;
else {
/* But if the end is the maximum representable on the machine,
* just let the range that this would extend to have no end */
- invlist_set_len(invlist, len - 1);
+ invlist_set_len(invlist, len - 1, offset);
}
return;
}
len += 2; /* Includes an element each for the start and end of range */
- /* If overflows the existing space, extend, which may cause the array to be
- * moved */
+ /* If wll overflow the existing space, extend, which may cause the array to
+ * be moved */
if (max < len) {
invlist_extend(invlist, len);
- invlist_set_len(invlist, len); /* Have to set len here to avoid assert
- failure in invlist_array() */
+
+ /* Have to set len here to avoid assert failure in invlist_array() */
+ invlist_set_len(invlist, len, offset);
+
array = invlist_array(invlist);
}
else {
- invlist_set_len(invlist, len);
+ invlist_set_len(invlist, len, offset);
}
/* The next item on the list starts the range, the one after that is
else {
/* But if the end is the maximum representable on the machine, just let
* the range have no end */
- invlist_set_len(invlist, len - 1);
+ invlist_set_len(invlist, len - 1, offset);
}
}
/* Set result to final length, which can change the pointer to array_u, so
* re-find it */
if (len_u != _invlist_len(u)) {
- invlist_set_len(u, len_u);
+ invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
invlist_trim(u);
array_u = invlist_array(u);
}
/* Set result to final length, which can change the pointer to array_r, so
* re-find it */
if (len_r != _invlist_len(r)) {
- invlist_set_len(r, len_r);
+ invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
invlist_trim(r);
array_r = invlist_array(r);
}
* have a zero; removes it otherwise. As described above, the data
* structure is set up so that this is very efficient */
- STRLEN* len_pos = _get_invlist_len_addr(invlist);
-
PERL_ARGS_ASSERT__INVLIST_INVERT;
assert(! invlist_is_iterating(invlist));
/* The inverse of matching nothing is matching everything */
- if (*len_pos == 0) {
+ if (_invlist_len(invlist) == 0) {
_append_range_to_invlist(invlist, 0, UV_MAX);
return;
}
- /* The exclusive or complents 0 to 1; and 1 to 0. If the result is 1, the
- * zero element was a 0, so it is being removed, so the length decrements
- * by 1; and vice-versa. SvCUR is unaffected */
- if (*get_invlist_offset_addr(invlist) ^= 1) {
- (*len_pos)--;
- }
- else {
- (*len_pos)++;
- }
+ *get_invlist_offset_addr(invlist) = ! *get_invlist_offset_addr(invlist);
}
void
invlist_extend(invlist, len);
array = invlist_array(invlist);
}
- invlist_set_len(invlist, len);
+ invlist_set_len(invlist, len, *get_invlist_offset_addr(invlist));
array[len - 1] = PERL_UNICODE_MAX + 1;
}
else { /* Remove the 0x110000 */
- invlist_set_len(invlist, len - 1);
+ invlist_set_len(invlist, len - 1, *get_invlist_offset_addr(invlist));
}
}
/* Need to allocate extra space to accommodate Perl's addition of a
* trailing NUL to SvPV's, since it thinks they are always strings */
SV* new_invlist = _new_invlist(_invlist_len(invlist) + 1);
- STRLEN length = SvCUR(invlist);
+ STRLEN physical_length = SvCUR(invlist);
+ bool offset = *(get_invlist_offset_addr(invlist));
PERL_ARGS_ASSERT_INVLIST_CLONE;
- SvCUR_set(new_invlist, length); /* This isn't done automatically */
- invlist_set_len(new_invlist, _invlist_len(invlist));
- *(get_invlist_offset_addr(new_invlist))
- = *(get_invlist_offset_addr(invlist));
- Copy(SvPVX(invlist), SvPVX(new_invlist), length, char);
+ *(get_invlist_offset_addr(new_invlist)) = offset;
+ invlist_set_len(new_invlist, _invlist_len(invlist), offset);
+ Copy(SvPVX(invlist), SvPVX(new_invlist), physical_length, char);
return new_invlist;
}
PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR;
- return &(LvTARGOFF(invlist));
+ assert(SvTYPE(invlist) == SVt_INVLIST);
+
+ return &(((XINVLIST*) SvANY(invlist))->iterator);
}
PERL_STATIC_INLINE void
* at the 0 that is always stored immediately before the array. */
array_b--;
len_b++;
- array_b[0] = 0;
}
}
default:
/* Use deprecated warning to increase the
* chances of this being output */
- ckWARN2regdep(RExC_parse, "Perl folding rules are not up-to-date for 0x%"UVXf"; please use the perlbug utility to report;", j);
+ ckWARN2reg_d(RExC_parse, "Perl folding rules are not up-to-date for 0x%"UVXf"; please use the perlbug utility to report;", j);
break;
}
}