#define FOLD cBOOL(RExC_flags & RXf_PMf_FOLD)
-#define OOB_UNICODE 12345678
#define OOB_NAMEDCLASS -1
+/* There is no code point that is out-of-bounds, so this is problematic. But
+ * its only current use is to initialize a variable that is always set before
+ * looked at. */
+#define OOB_UNICODE 0xDEADBEEF
+
#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
* problematic sequences. This delta is used by the caller to adjust the
* min length of the match, and the delta between min and max, so that the
* optimizer doesn't reject these possibilities based on size constraints.
- * 2) These sequences are not currently correctly handled by the trie code
- * either, so it changes the joined node type to ops that are not handled
- * by trie's, those new ops being EXACTFU_SS and EXACTFU_TRICKYFOLD.
+ * 2) These sequences require special handling by the trie code, so it
+ * changes the joined node type to ops for the trie's benefit, those new
+ * ops being EXACTFU_SS and EXACTFU_TRICKYFOLD.
* 3) This is sufficient for the two Greek sequences (described below), but
* the one involving the Sharp s (\xDF) needs more. The node type
* EXACTFU_SS is used for an EXACTFU node that contains at least one "ss"
data->flags |= (OP(scan) == MEOL
? SF_BEFORE_MEOL
: SF_BEFORE_SEOL);
+ SCAN_COMMIT(pRExC_state, data, minlenp);
+
}
else if ( PL_regkind[OP(scan)] == BRANCHJ
/* Lookbehind, or need to calculate parens/evals/stclass: */
* the original pattern needs upgrading to utf8.
*/
-bool
+static bool
S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
char *pat, STRLEN plen)
{
return TRUE;
}
+PERL_STATIC_INLINE UV
+S_invlist_highest(pTHX_ SV* const invlist)
+{
+ /* Returns the highest code point that matches an inversion list. This API
+ * has an ambiguity, as it returns 0 under either the highest is actually
+ * 0, or if the list is empty. If this distinction matters to you, check
+ * for emptiness before calling this function */
+
+ UV len = invlist_len(invlist);
+ UV *array;
+
+ PERL_ARGS_ASSERT_INVLIST_HIGHEST;
+
+ if (len == 0) {
+ return 0;
+ }
+
+ array = invlist_array(invlist);
+
+ /* The last element in the array in the inversion list always starts a
+ * range that goes to infinity. That range may be for code points that are
+ * matched in the inversion list, or it may be for ones that aren't
+ * matched. In the latter case, the highest code point in the set is one
+ * less than the beginning of this range; otherwise it is the final element
+ * of this range: infinity */
+ return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
+ ? UV_MAX
+ : array[len - 1] - 1;
+}
+
#ifndef PERL_IN_XSUB_RE
SV *
Perl__invlist_contents(pTHX_ SV* const invlist)
}
/* Like DO_POSIX_LATIN1_ONLY_KNOWN, but for the complement. A combination of
- * this and DO_N_POSIX */
+ * this and DO_N_POSIX. Sets <matches_above_unicode> only if it can; unchanged
+ * otherwise */
#define DO_N_POSIX_LATIN1_ONLY_KNOWN(node, class, destlist, sourcelist, \
- l1_sourcelist, Xpropertyname, run_time_list) \
+ l1_sourcelist, Xpropertyname, run_time_list, matches_above_unicode) \
if (AT_LEAST_ASCII_RESTRICTED) { \
_invlist_union_complement_2nd(destlist, sourcelist, &destlist); \
} \
else { \
Perl_sv_catpvf(aTHX_ run_time_list, "!utf8::%s\n", Xpropertyname); \
+ matches_above_unicode = TRUE; \
if (LOC) { \
- ANYOF_CLASS_SET(node, namedclass); \
+ ANYOF_CLASS_SET(node, namedclass); \
} \
else { \
SV* scratch_list = NULL; \
UV value = 0; /* XXX:dmq: needs to be referenceable (unfortunately) */
register regnode *ret;
STRLEN numlen;
- IV namedclass;
+ IV namedclass = OOB_NAMEDCLASS;
char *rangebegin = NULL;
bool need_class = 0;
bool allow_full_fold = TRUE; /* Assume wants multi-char folding */
STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
than just initialized. */
SV* properties = NULL; /* Code points that match \p{} \P{} */
+ SV* posixes = NULL; /* Code points that match classes like, [:word:],
+ extended beyond the Latin1 range */
UV element_count = 0; /* Number of distinct elements in the class.
Optimizations may be possible if this is tiny */
UV n;
/* Set if a component of this character class is user-defined; just passed
* on to the engine */
- UV has_user_defined_property = 0;
+ bool has_user_defined_property = FALSE;
/* inversion list of code points this node matches only when the target
* string is in UTF-8. (Because is under /d) */
UV literal_endpoint = 0;
#endif
UV stored = 0; /* how many chars stored in the bitmap */
+ bool invert = FALSE; /* Is this class to be complemented */
+
+ /* Is there any thing like \W or [:^digit:] that matches above the legal
+ * Unicode range? */
+ bool runtime_posix_matches_above_Unicode = FALSE;
regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
case we need to change the emitted regop to an EXACT. */
if (UCHARAT(RExC_parse) == '^') { /* Complement of range. */
RExC_naughty++;
RExC_parse++;
- if (!SIZE_ONLY)
- ANYOF_FLAGS(ret) |= ANYOF_INVERT;
+ invert = TRUE;
/* We have decided to not allow multi-char folds in inverted character
* classes, due to the confusion that can happen, especially with
SV** invlistsvp;
SV* invlist;
char* name;
+
if (UCHARAT(RExC_parse) == '^') {
RExC_parse++;
n--;
Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n",
(value == 'p' ? '+' : '!'),
name);
- has_user_defined_property = 1;
+ has_user_defined_property = TRUE;
/* We don't know yet, so have to assume that the
* property could match something in the Latin1 range,
/* Here, did get the swash and its inversion list. If
* the swash is from a user-defined property, then this
* whole character class should be regarded as such */
- SV** user_defined_svp =
- hv_fetchs(MUTABLE_HV(SvRV(swash)),
- "USER_DEFINED", FALSE);
- if (user_defined_svp) {
- has_user_defined_property
- |= SvUV(*user_defined_svp);
- }
+ has_user_defined_property =
+ _is_swash_user_defined(swash);
/* Invert if asking for the complement */
if (value == 'P') {
- _invlist_union_complement_2nd(properties, invlist, &properties);
+ _invlist_union_complement_2nd(properties,
+ invlist,
+ &properties);
/* The swash can't be used as-is, because we've
* inverted things; delay removing it to here after
literal_endpoint++;
#endif
- if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
-
- /* What matches in a locale is not known until runtime, so need to
- * (one time per class) allocate extra space to pass to regexec.
- * The space will contain a bit for each named class that is to be
- * matched against. This isn't needed for \p{} and pseudo-classes,
- * as they are not affected by locale, and hence are dealt with
- * separately */
- if (LOC && namedclass < ANYOF_MAX && ! need_class) {
+ /* What matches in a locale is not known until runtime. This
+ * includes what the Posix classes (like \w, [:space:]) match.
+ * Room must be reserved (one time per class) to store such
+ * classes, either if Perl is compiled so that locale nodes always
+ * should have this space, or if there is such class info to be
+ * stored. The space will contain a bit for each named class that
+ * is to be matched against. This isn't needed for \p{} and
+ * pseudo-classes, as they are not affected by locale, and hence
+ * are dealt with separately */
+ if (LOC
+ && ! need_class
+ && (ANYOF_LOCALE == ANYOF_CLASS
+ || (namedclass > OOB_NAMEDCLASS && namedclass < ANYOF_MAX)))
+ {
need_class = 1;
if (SIZE_ONLY) {
RExC_size += ANYOF_CLASS_SKIP - ANYOF_SKIP;
ANYOF_FLAGS(ret) |= ANYOF_CLASS;
}
+ if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
+
/* a bad range like a-\d, a-[:digit:]. The '-' is taken as a
* literal, as is the character that began the false range, i.e.
* the 'a' in the examples */
switch ((I32)namedclass) {
case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */
- DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
break;
case ANYOF_NALNUMC:
- DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
- PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv);
+ DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+ PL_PosixAlnum, PL_L1PosixAlnum, "XPosixAlnum", listsv,
+ runtime_posix_matches_above_Unicode);
break;
case ANYOF_ALPHA:
- DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
break;
case ANYOF_NALPHA:
- DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
- PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv);
+ DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+ PL_PosixAlpha, PL_L1PosixAlpha, "XPosixAlpha", listsv,
+ runtime_posix_matches_above_Unicode);
break;
case ANYOF_ASCII:
if (LOC) {
ANYOF_CLASS_SET(ret, namedclass);
}
else {
- _invlist_union(properties, PL_ASCII, &properties);
+ _invlist_union(posixes, PL_ASCII, &posixes);
}
break;
case ANYOF_NASCII:
ANYOF_CLASS_SET(ret, namedclass);
}
else {
- _invlist_union_complement_2nd(properties,
- PL_ASCII, &properties);
+ _invlist_union_complement_2nd(posixes,
+ PL_ASCII, &posixes);
if (DEPENDS_SEMANTICS) {
ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL;
}
}
break;
case ANYOF_BLANK:
- DO_POSIX(ret, namedclass, properties,
+ DO_POSIX(ret, namedclass, posixes,
PL_PosixBlank, PL_XPosixBlank);
break;
case ANYOF_NBLANK:
- DO_N_POSIX(ret, namedclass, properties,
+ DO_N_POSIX(ret, namedclass, posixes,
PL_PosixBlank, PL_XPosixBlank);
break;
case ANYOF_CNTRL:
- DO_POSIX(ret, namedclass, properties,
+ DO_POSIX(ret, namedclass, posixes,
PL_PosixCntrl, PL_XPosixCntrl);
break;
case ANYOF_NCNTRL:
- DO_N_POSIX(ret, namedclass, properties,
+ DO_N_POSIX(ret, namedclass, posixes,
PL_PosixCntrl, PL_XPosixCntrl);
break;
case ANYOF_DIGIT:
/* There are no digits in the Latin1 range outside of
* ASCII, so call the macro that doesn't have to resolve
* them */
- DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, properties,
+ DO_POSIX_LATIN1_ONLY_KNOWN_L1_RESOLVED(ret, namedclass, posixes,
PL_PosixDigit, "XPosixDigit", listsv);
has_special_charset_op = TRUE;
break;
case ANYOF_NDIGIT:
- DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
- PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv);
+ DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+ PL_PosixDigit, PL_PosixDigit, "XPosixDigit", listsv,
+ runtime_posix_matches_above_Unicode);
has_special_charset_op = TRUE;
break;
case ANYOF_GRAPH:
- DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
break;
case ANYOF_NGRAPH:
- DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
- PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
+ DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+ PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv,
+ runtime_posix_matches_above_Unicode);
break;
case ANYOF_HORIZWS:
/* For these, we use the cp_list, as /d doesn't make a
Xname = "XPosixLower";
}
if (namedclass == ANYOF_LOWER) {
- DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
ascii_source, l1_source, Xname, listsv);
}
else {
DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
- properties, ascii_source, l1_source, Xname, listsv);
+ posixes, ascii_source, l1_source, Xname, listsv,
+ runtime_posix_matches_above_Unicode);
}
break;
}
case ANYOF_PRINT:
- DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
break;
case ANYOF_NPRINT:
- DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
- PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv);
+ DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+ PL_PosixPrint, PL_L1PosixPrint, "XPosixPrint", listsv,
+ runtime_posix_matches_above_Unicode);
break;
case ANYOF_PUNCT:
- DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
break;
case ANYOF_NPUNCT:
- DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
- PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv);
+ DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+ PL_PosixPunct, PL_L1PosixPunct, "XPosixPunct", listsv,
+ runtime_posix_matches_above_Unicode);
break;
case ANYOF_PSXSPC:
- DO_POSIX(ret, namedclass, properties,
+ DO_POSIX(ret, namedclass, posixes,
PL_PosixSpace, PL_XPosixSpace);
break;
case ANYOF_NPSXSPC:
- DO_N_POSIX(ret, namedclass, properties,
+ DO_N_POSIX(ret, namedclass, posixes,
PL_PosixSpace, PL_XPosixSpace);
break;
case ANYOF_SPACE:
- DO_POSIX(ret, namedclass, properties,
+ DO_POSIX(ret, namedclass, posixes,
PL_PerlSpace, PL_XPerlSpace);
has_special_charset_op = TRUE;
break;
case ANYOF_NSPACE:
- DO_N_POSIX(ret, namedclass, properties,
+ DO_N_POSIX(ret, namedclass, posixes,
PL_PerlSpace, PL_XPerlSpace);
has_special_charset_op = TRUE;
break;
Xname = "XPosixUpper";
}
if (namedclass == ANYOF_UPPER) {
- DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
ascii_source, l1_source, Xname, listsv);
}
else {
DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass,
- properties, ascii_source, l1_source, Xname, listsv);
+ posixes, ascii_source, l1_source, Xname, listsv,
+ runtime_posix_matches_above_Unicode);
}
break;
}
case ANYOF_ALNUM: /* Really is 'Word' */
- DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
+ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
has_special_charset_op = TRUE;
break;
case ANYOF_NALNUM:
- DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
- PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv);
+ DO_N_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, posixes,
+ PL_PosixWord, PL_L1PosixWord, "XPosixWord", listsv,
+ runtime_posix_matches_above_Unicode);
has_special_charset_op = TRUE;
break;
case ANYOF_VERTWS:
has_special_non_charset_op = TRUE;
break;
case ANYOF_XDIGIT:
- DO_POSIX(ret, namedclass, properties,
+ DO_POSIX(ret, namedclass, posixes,
PL_PosixXDigit, PL_XPosixXDigit);
break;
case ANYOF_NXDIGIT:
- DO_N_POSIX(ret, namedclass, properties,
+ DO_N_POSIX(ret, namedclass, posixes,
PL_PosixXDigit, PL_XPosixXDigit);
break;
case ANYOF_MAX:
|| (prevvalue == '0' && value == '9')))
{
U8 op;
- bool invert = ANYOF_FLAGS(ret) & ANYOF_INVERT;
const char * cur_parse = RExC_parse;
if (has_special_charset_op) {
SV* fold_intersection = NULL;
- const UV highest_index = invlist_len(cp_list) - 1;
-
/* In the Latin1 range, the characters that can be folded-to or -from
* are precisely the alphabetic characters. If the highest code point
* is within Latin1, we can use the compiled-in list, and not have to
- * go out to disk. If the last element in the array is in the
- * inversion list set, it starts a range that goes to infinity, so the
- * maximum of the inversion list is definitely above Latin1.
- * Otherwise, it starts a range that isn't in the set, so the max is
- * one less than it */
- if (! ELEMENT_RANGE_MATCHES_INVLIST(highest_index)
- && invlist_array(cp_list)[highest_index] <= 256)
- {
+ * go out to disk. */
+ if (invlist_highest(cp_list) < 256) {
_invlist_intersection(PL_L1PosixAlpha, cp_list, &fold_intersection);
}
else {
switch (j) {
case 'k':
case 'K':
- /* KELVIN SIGN */
cp_list =
- add_cp_to_invlist(cp_list, 0x212A);
+ add_cp_to_invlist(cp_list, KELVIN_SIGN);
break;
case 's':
case 'S':
- /* LATIN SMALL LETTER LONG S */
- cp_list =
- add_cp_to_invlist(cp_list, 0x017F);
+ cp_list = add_cp_to_invlist(cp_list,
+ LATIN_SMALL_LETTER_LONG_S);
break;
case MICRO_SIGN:
cp_list = add_cp_to_invlist(cp_list,
break;
case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
- /* ANGSTROM SIGN */
cp_list =
- add_cp_to_invlist(cp_list, 0x212B);
+ add_cp_to_invlist(cp_list, ANGSTROM_SIGN);
break;
case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
cp_list = add_cp_to_invlist(cp_list,
SvREFCNT_dec(fold_intersection);
}
- /* And combine the result (if any) with any inversion list from properties.
- * The lists are kept separate up to now because we don't want to fold the
- * properties */
- if (properties) {
+ /* And combine the result (if any) with any inversion list from posix
+ * classes. The lists are kept separate up to now because we don't want to
+ * fold the classes */
+ if (posixes) {
if (AT_LEAST_UNI_SEMANTICS) {
if (cp_list) {
- _invlist_union(cp_list, properties, &cp_list);
- SvREFCNT_dec(properties);
+ _invlist_union(cp_list, posixes, &cp_list);
+ SvREFCNT_dec(posixes);
}
else {
- cp_list = properties;
+ cp_list = posixes;
}
}
else {
- /* Under /d, we put the things that match only when the target
- * string is utf8, into a separate list */
+ /* Under /d, we put into a separate list the Latin1 things that
+ * match only when the target string is utf8 */
SV* nonascii_but_latin1_properties = NULL;
- _invlist_intersection(properties, PL_Latin1,
+ _invlist_intersection(posixes, PL_Latin1,
&nonascii_but_latin1_properties);
_invlist_subtract(nonascii_but_latin1_properties, PL_ASCII,
&nonascii_but_latin1_properties);
- _invlist_subtract(properties, nonascii_but_latin1_properties,
- &properties);
+ _invlist_subtract(posixes, nonascii_but_latin1_properties,
+ &posixes);
if (cp_list) {
- _invlist_union(cp_list, properties, &cp_list);
- SvREFCNT_dec(properties);
+ _invlist_union(cp_list, posixes, &cp_list);
+ SvREFCNT_dec(posixes);
}
else {
- cp_list = properties;
+ cp_list = posixes;
}
if (depends_list) {
}
}
+ /* And combine the result (if any) with any inversion list from properties.
+ * The lists are kept separate up to now so that we can distinguish the two
+ * in regards to matching above-Unicode. A run-time warning is generated
+ * if a Unicode property is matched against a non-Unicode code point. But,
+ * we allow user-defined properties to match anything, without any warning,
+ * and we also suppress the warning if there is a portion of the character
+ * class that isn't a Unicode property, and which matches above Unicode, \W
+ * or [\x{110000}] for example.
+ * (Note that in this case, unlike the Posix one above, there is no
+ * <depends_list>, because having a Unicode property forces Unicode
+ * semantics */
+ if (properties) {
+ bool warn_super = ! has_user_defined_property;
+ if (cp_list) {
+
+ /* If it matters to the final outcome, see if a non-property
+ * component of the class matches above Unicode. If so, the
+ * warning gets suppressed. This is true even if just a single
+ * such code point is specified, as though not strictly correct if
+ * another such code point is matched against, the fact that they
+ * are using above-Unicode code points indicates they should know
+ * the issues involved */
+ if (warn_super) {
+ bool non_prop_matches_above_Unicode =
+ runtime_posix_matches_above_Unicode
+ | (invlist_highest(cp_list) > PERL_UNICODE_MAX);
+ if (invert) {
+ non_prop_matches_above_Unicode =
+ ! non_prop_matches_above_Unicode;
+ }
+ warn_super = ! non_prop_matches_above_Unicode;
+ }
+
+ _invlist_union(properties, cp_list, &cp_list);
+ SvREFCNT_dec(properties);
+ }
+ else {
+ cp_list = properties;
+ }
+
+ if (warn_super) {
+ ANYOF_FLAGS(ret) |= ANYOF_WARN_SUPER;
+ }
+ }
+
/* Here, we have calculated what code points should be in the character
* class.
*
* optimize locale. Doing so perhaps could be done as long as there is
* nothing like \w in it; some thought also would have to be given to the
* interaction with above 0x100 chars */
- if ((ANYOF_FLAGS(ret) & ANYOF_INVERT)
+ if (invert
&& ! LOC
&& ! depends_list
&& ! unicode_alternate
}
/* Clear the invert flag since have just done it here */
- ANYOF_FLAGS(ret) &= ~ANYOF_INVERT;
+ invert = FALSE;
}
/* Here, <cp_list> contains all the code points we can determine at
}
}
+ if (invert) {
+ ANYOF_FLAGS(ret) |= ANYOF_INVERT;
+ }
+
/* Combine the two lists into one. */
if (depends_list) {
if (cp_list) {
* av[2] stores the multicharacter foldings, used later in
* regexec.c:S_reginclass().
* av[3] stores the cp_list inversion list for use in addition or
- * instead of av[0]; not used if av[1] isn't NULL
+ * instead of av[0]; used only if av[1] is NULL
* av[4] is set if any component of the class is from a user-defined
- * property; not used if av[1] isn't NULL */
+ * property; used only if av[1] is NULL */
AV * const av = newAV();
SV *rv;