loopdone: /* Jumped to when encounters something that shouldn't be
in the node */
- /* Free up any over-allocated space */
- change_engine_size(pRExC_state, - (initial_size - STR_SZ(len)));
+ /* Free up any over-allocated space; cast is to silence bogus
+ * warning in MS VC */
+ change_engine_size(pRExC_state,
+ - (Ptrdiff_t) (initial_size - STR_SZ(len)));
/* I (khw) don't know if you can get here with zero length, but the
* old code handled this situation by creating a zero-length EXACT
FALSE, /* Require return to be an ANYOF */
¤t))
{
- FAIL2("panic: regclass returned failure to handle_sets, "
- "flags=%#" UVxf, (UV) *flagp);
+ goto regclass_failed;
}
/* regclass() will return with parsing just the \ sequence,
FALSE, /* Require return to be an ANYOF */
¤t))
{
- FAIL2("panic: regclass returned failure to handle_sets, "
- "flags=%#" UVxf, (UV) *flagp);
+ goto regclass_failed;
}
if (! current) {
}
if (!node)
- FAIL2("panic: regclass returned failure to handle_sets, flags=%#" UVxf,
- PTR2UV(flagp));
+ goto regclass_failed;
/* Fix up the node type if we are in locale. (We have pretended we are
* under /u for the purposes of regclass(), as this construct will only
nextchar(pRExC_state);
Set_Node_Length(REGNODE_p(node), RExC_parse - oregcomp_parse + 1); /* MJD */
return node;
+
+ regclass_failed:
+ FAIL2("panic: regclass returned failure to handle_sets, " "flags=%#" UVxf,
+ (UV) *flagp);
}
#ifdef ENABLE_REGEX_SETS_DEBUGGING
S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
const bool stop_at_1, /* Just parse the next thing, don't
look for a full character class */
- bool allow_multi_folds,
+ bool allow_mutiple_chars,
const bool silence_non_portable, /* Don't output warnings
about too large
characters */
#if UNICODE_MAJOR_VERSION < 3 /* no multifolds in early Unicode */ \
|| (UNICODE_MAJOR_VERSION == 3 && UNICODE_DOT_VERSION == 0 \
&& UNICODE_DOT_DOT_VERSION == 0)
- allow_multi_folds = FALSE;
+ allow_mutiple_chars = FALSE;
#endif
/* We include the /i status at the beginning of this so that we can
if (UCHARAT(RExC_parse) == '^') { /* Complement the class */
RExC_parse++;
invert = TRUE;
- allow_multi_folds = FALSE;
+ allow_mutiple_chars = FALSE;
MARK_NAUGHTY(1);
SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
}
else { /* cp_count > 1 */
assert(cp_count > 1);
if (! RExC_in_multi_char_class) {
- if (invert || range || *RExC_parse == '-') {
+ if ( ! allow_mutiple_chars
+ || invert
+ || range
+ || *RExC_parse == '-')
+ {
if (strict) {
RExC_parse--;
vFAIL("\\N{} in inverted character class or as a range end-point is restricted to one character");
) {
SV* scratch_list = NULL;
- /* What the Posix classes (like \w, [:space:]) match in locale
- * isn't knowable under locale until actual match time. A
+ /* What the Posix classes (like \w, [:space:]) match isn't
+ * generally knowable under locale until actual match time. A
* special node is used for these which has extra space for a
* bitmap, with a bit reserved for each named class that is to
- * be matched against. This isn't needed for \p{} and
+ * be matched against. (This isn't needed for \p{} and
* pseudo-classes, as they are not affected by locale, and
- * hence are dealt with separately */
- POSIXL_SET(posixl, namedclass);
- has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
- anyof_flags |= ANYOF_MATCHES_POSIXL;
-
- /* The above-Latin1 characters are not subject to locale rules.
- * Just add them to the unconditionally-matched list */
-
- /* Get the list of the above-Latin1 code points this matches */
- _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
- PL_XPosix_ptrs[classnum],
-
- /* Odd numbers are complements, like
- * NDIGIT, NASCII, ... */
- namedclass % 2 != 0,
- &scratch_list);
- /* Checking if 'cp_list' is NULL first saves an extra clone.
- * Its reference count will be decremented at the next union,
- * etc, or if this is the only instance, at the end of the
- * routine */
- if (! cp_list) {
- cp_list = scratch_list;
- }
- else {
- _invlist_union(cp_list, scratch_list, &cp_list);
- SvREFCNT_dec_NN(scratch_list);
+ * hence are dealt with separately.) However, if a named class
+ * and its complement are both present, then it matches
+ * everything, and there is no runtime dependency. Odd numbers
+ * are the complements of the next lower number, so xor works.
+ * (Note that something like [\w\D] should match everything,
+ * because \d should be a proper subset of \w. But rather than
+ * trust that the locale is well behaved, we leave this to
+ * runtime to sort out) */
+ if (POSIXL_TEST(posixl, namedclass ^ 1)) {
+ cp_list = _add_range_to_invlist(cp_list, 0, UV_MAX);
+ POSIXL_ZERO(posixl);
+ has_runtime_dependency &= ~HAS_L_RUNTIME_DEPENDENCY;
+ anyof_flags &= ~ANYOF_MATCHES_POSIXL;
+ continue; /* We could ignore the rest of the class, but
+ best to parse it for any errors */
+ }
+ else { /* Here, isn't the complement of any already parsed
+ class */
+ POSIXL_SET(posixl, namedclass);
+ has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
+ anyof_flags |= ANYOF_MATCHES_POSIXL;
+
+ /* The above-Latin1 characters are not subject to locale
+ * rules. Just add them to the unconditionally-matched
+ * list */
+
+ /* Get the list of the above-Latin1 code points this
+ * matches */
+ _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
+ PL_XPosix_ptrs[classnum],
+
+ /* Odd numbers are complements,
+ * like NDIGIT, NASCII, ... */
+ namedclass % 2 != 0,
+ &scratch_list);
+ /* Checking if 'cp_list' is NULL first saves an extra
+ * clone. Its reference count will be decremented at the
+ * next union, etc, or if this is the only instance, at the
+ * end of the routine */
+ if (! cp_list) {
+ cp_list = scratch_list;
+ }
+ else {
+ _invlist_union(cp_list, scratch_list, &cp_list);
+ SvREFCNT_dec_NN(scratch_list);
+ }
+ continue; /* Go get next character */
}
- continue; /* Go get next character */
}
else {
* "ss" =~ /^[^\xDF]+$/i => N
*
* See [perl #89750] */
- if (FOLD && allow_multi_folds && value == prevvalue) {
+ if (FOLD && allow_mutiple_chars && value == prevvalue) {
if ( value == LATIN_SMALL_LETTER_SHARP_S
|| (value > 255 && _invlist_contains_cp(PL_HasMultiCharFold,
value)))
RESTORE_CONTEXT;
}
+STATIC SV *
+S_get_fq_name(pTHX_
+ const char * const name, /* The first non-blank in the \p{}, \P{} */
+ const Size_t name_len, /* Its length in bytes, not including any trailing space */
+ const bool is_utf8, /* ? Is 'name' encoded in UTF-8 */
+ const bool has_colon_colon
+ )
+{
+ /* Returns a mortal SV containing the fully qualified version of the input
+ * name */
+
+ SV * fq_name;
+
+ fq_name = newSVpvs_flags("", SVs_TEMP);
+
+ /* Use the current package if it wasn't included in our input */
+ if (! has_colon_colon) {
+ const HV * pkg = (IN_PERL_COMPILETIME)
+ ? PL_curstash
+ : CopSTASH(PL_curcop);
+ const char* pkgname = HvNAME(pkg);
+
+ Perl_sv_catpvf(aTHX_ fq_name, "%" UTF8f,
+ UTF8fARG(is_utf8, strlen(pkgname), pkgname));
+ sv_catpvs(fq_name, "::");
+ }
+
+ Perl_sv_catpvf(aTHX_ fq_name, "%" UTF8f,
+ UTF8fARG(is_utf8, name_len, name));
+ return fq_name;
+}
+
SV *
Perl_parse_uniprop_string(pTHX_
it is the definition. Otherwise it is a
string containing the fully qualified sub
name of 'name' */
+ SV * fq_name = NULL; /* For user-defined properties, the fully
+ qualified name */
bool invert_return = FALSE; /* ? Do we need to complement the result before
returning it */
dSP;
SV * user_sub_sv = MUTABLE_SV(user_sub);
SV * error; /* Any error returned by calling 'user_sub' */
- SV * fq_name; /* Fully qualified property name */
+ SV * key; /* The key into the hash of user defined sub names
+ */
SV * placeholder;
- char to_fold_string[] = "0:"; /* The 0 gets overwritten with the
- actual value */
SV ** saved_user_prop_ptr; /* Hash entry for this property */
/* How many times to retry when another thread is in the middle of
* should the need arise, passing the /i status as a parameter.
*
* We start by constructing the hash key name, consisting of the
- * fully qualified subroutine name */
- fq_name = sv_2mortal(newSV(10)); /* 10 is just a guess */
- (void) cv_name(user_sub, fq_name, 0);
-
- /* But precede the sub name in the key with the /i status, so that
- * there is a key for /i and a different key for non-/i */
- to_fold_string[0] = to_fold + '0';
- sv_insert(fq_name, 0, 0, to_fold_string, 2);
+ * fully qualified subroutine name, preceded by the /i status, so
+ * that there is a key for /i and a different key for non-/i */
+ key = newSVpvn(((to_fold) ? "1" : "0"), 1);
+ fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
+ non_pkg_begin != 0);
+ sv_catsv(key, fq_name);
+ sv_2mortal(key);
/* We only call the sub once throughout the life of the program
* (with the /i, non-/i exception noted above). That means the
/* If we have an entry for this key, the subroutine has already
* been called once with this /i status. */
saved_user_prop_ptr = hv_fetch(PL_user_def_props,
- SvPVX(fq_name), SvCUR(fq_name), 0);
+ SvPVX(key), SvCUR(key), 0);
if (saved_user_prop_ptr) {
/* If the saved result is an inversion list, it is the valid
* */
SWITCH_TO_GLOBAL_CONTEXT;
placeholder= newSVuv(PTR2IV(ORIGINAL_CONTEXT));
- (void) hv_store_ent(PL_user_def_props, fq_name, placeholder, 0);
+ (void) hv_store_ent(PL_user_def_props, key, placeholder, 0);
RESTORE_CONTEXT;
/* Now that we have a placeholder, we can let other threads
USER_PROP_MUTEX_UNLOCK;
/* Make sure the placeholder always gets destroyed */
- SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(fq_name));
+ SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(key));
PUSHMARK(SP);
SAVETMPS;
* and add the permanent entry */
USER_PROP_MUTEX_LOCK;
- S_delete_recursion_entry(aTHX_ SvPVX(fq_name));
+ S_delete_recursion_entry(aTHX_ SvPVX(key));
if (! prop_definition || is_invlist(prop_definition)) {
* property; otherwise use the error message */
SWITCH_TO_GLOBAL_CONTEXT;
(void) hv_store_ent(PL_user_def_props,
- fq_name,
+ key,
((prop_definition)
? newSVsv(prop_definition)
: newSVsv(msg)),
definition_deferred:
/* Here it could yet to be defined, so defer evaluation of this
- * until its needed at runtime. */
- prop_definition = newSVpvs_flags("", SVs_TEMP);
-
- /* To avoid any ambiguity, the package is always specified.
- * Use the current one if it wasn't included in our input */
- if (non_pkg_begin == 0) {
- const HV * pkg = (IN_PERL_COMPILETIME)
- ? PL_curstash
- : CopSTASH(PL_curcop);
- const char* pkgname = HvNAME(pkg);
-
- Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
- UTF8fARG(is_utf8, strlen(pkgname), pkgname));
- sv_catpvs(prop_definition, "::");
+ * until its needed at runtime. We need the fully qualified property name
+ * to avoid ambiguity, and a trailing newline */
+ if (! fq_name) {
+ fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
+ non_pkg_begin != 0 /* If has "::" */
+ );
}
-
- Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
- UTF8fARG(is_utf8, name_len, name));
- sv_catpvs(prop_definition, "\n");
+ sv_catpvs(fq_name, "\n");
*user_defined_ptr = TRUE;
- return prop_definition;
+ return fq_name;
}
#endif