return invlist;
}
+SV*
+Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV element0, UV** other_elements_ptr)
+{
+ /* Create and return an inversion list whose contents are to be populated
+ * by the caller. The caller gives the number of elements (in 'size') and
+ * the very first element ('element0'). This function will set
+ * '*other_elements_ptr' to an array of UVs, where the remaining elements
+ * are to be placed.
+ *
+ * Obviously there is some trust involved that the caller will properly
+ * fill in the other elements of the array.
+ *
+ * (The first element needs to be passed in, as the underlying code does
+ * things differently depending on whether it is zero or non-zero) */
+
+ SV* invlist = _new_invlist(size);
+ bool offset;
+
+ PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST;
+
+ _append_range_to_invlist(invlist, element0, element0);
+ offset = *get_invlist_offset_addr(invlist);
+
+ invlist_set_len(invlist, size, offset);
+ *other_elements_ptr = invlist_array(invlist) + 1;
+ return invlist;
+}
+
#endif
PERL_STATIC_INLINE SV*
*get_invlist_offset_addr(invlist) = ! *get_invlist_offset_addr(invlist);
}
-void
-Perl__invlist_invert_prop(pTHX_ SV* const invlist)
-{
- /* Complement the input inversion list (which must be a Unicode property,
- * all of which don't match above the Unicode maximum code point.) And
- * Perl has chosen to not have the inversion match above that either. This
- * adds a 0x110000 if the list didn't end with it, and removes it if it did
- */
-
- UV len;
- UV* array;
-
- PERL_ARGS_ASSERT__INVLIST_INVERT_PROP;
-
- _invlist_invert(invlist);
-
- len = _invlist_len(invlist);
-
- if (len != 0) { /* If empty do nothing */
- array = invlist_array(invlist);
- if (array[len - 1] != PERL_UNICODE_MAX + 1) {
- /* Add 0x110000. First, grow if necessary */
- len++;
- if (invlist_max(invlist) < len) {
- invlist_extend(invlist, len);
- array = invlist_array(invlist);
- }
- invlist_set_len(invlist, len, *get_invlist_offset_addr(invlist));
- array[len - 1] = PERL_UNICODE_MAX + 1;
- }
- else { /* Remove the 0x110000 */
- invlist_set_len(invlist, len - 1, *get_invlist_offset_addr(invlist));
- }
- }
-
- return;
-}
#endif
PERL_STATIC_INLINE SV*
* Unicode range? */
bool runtime_posix_matches_above_Unicode = FALSE;
+ bool warn_super = ALWAYS_WARN_SUPER;
+
regnode * const orig_emit = RExC_emit; /* Save the original RExC_emit in
case we need to change the emitted regop to an EXACT. */
const char * orig_parse = RExC_parse;
* would cause things in <depends_list> to match
* inappropriately, except that any \p{}, including
* this one forces Unicode semantics, which means there
- * is <no depends_list> */
+ * is no <depends_list> */
ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP_NON_UTF8;
}
else {
/* Here, did get the swash and its inversion list. If
* the swash is from a user-defined property, then this
* whole character class should be regarded as such */
- has_user_defined_property =
- (swash_init_flags
- & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY);
+ if (swash_init_flags
+ & _CORE_SWASH_INIT_USER_DEFINED_PROPERTY)
+ {
+ has_user_defined_property = TRUE;
+ }
+ else if
+ /* We warn on matching an above-Unicode code point
+ * if the match would return true, except don't
+ * warn for \p{All}, which has exactly one element
+ * = 0 */
+ (_invlist_contains_cp(invlist, 0x110000)
+ && (! (_invlist_len(invlist) == 1
+ && *invlist_array(invlist) == 0)))
+ {
+ warn_super = TRUE;
+ }
+
/* Invert if asking for the complement */
if (value == 'P') {
return ret;
}
- /* If the character class contains only a single element, it may be
- * optimizable into another node type which is smaller and runs faster.
- * Check if this is the case for this class */
+ /* Here, we've gone through the entire class and dealt with multi-char
+ * folds. We are now in a position that we can do some checks to see if we
+ * can optimize this ANYOF node into a simpler one, even in Pass 1.
+ * Currently we only do two checks:
+ * 1) is in the unlikely event that the user has specified both, eg. \w and
+ * \W under /l, then the class matches everything. (This optimization
+ * is done only to make the optimizer code run later work.)
+ * 2) if the character class contains only a single element (including a
+ * single range), we see if there is an equivalent node for it.
+ * Other checks are possible */
if (! ret_invlist /* Can't optimize if returning the constructed
inversion list */
&& (UNLIKELY(posixl_matches_all) || element_count == 1))
* <depends_list>, because having a Unicode property forces Unicode
* semantics */
if (properties) {
- bool warn_super = ! has_user_defined_property;
if (cp_list) {
/* If it matters to the final outcome, see if a non-property