char *parse; /* Input-scan pointer. */
char *copy_start; /* start of copy of input within
constructed parse string */
+ char *save_copy_start; /* Provides one level of saving
+ and restoring 'copy_start' */
char *copy_start_in_input; /* Position in input string
corresponding to copy_start */
SSize_t whilem_seen; /* number of WHILEM in this expr */
#define RExC_precomp (pRExC_state->precomp)
#define RExC_copy_start_in_input (pRExC_state->copy_start_in_input)
#define RExC_copy_start_in_constructed (pRExC_state->copy_start)
+#define RExC_save_copy_start_in_constructed (pRExC_state->save_copy_start)
#define RExC_precomp_end (pRExC_state->precomp_end)
#define RExC_rx_sv (pRExC_state->rx_sv)
#define RExC_rx (pRExC_state->rx)
} STMT_END
/* Setting this to NULL is a signal to not output warnings */
-#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE RExC_copy_start_in_constructed = NULL
-#define RESTORE_WARNINGS RExC_copy_start_in_constructed = RExC_precomp
+#define TURN_OFF_WARNINGS_IN_SUBSTITUTE_PARSE \
+ STMT_START { \
+ RExC_save_copy_start_in_constructed = RExC_copy_start_in_constructed;\
+ RExC_copy_start_in_constructed = NULL; \
+ } STMT_END
+#define RESTORE_WARNINGS \
+ RExC_copy_start_in_constructed = RExC_save_copy_start_in_constructed
/* Since a warning can be generated multiple times as the input is reparsed, we
* output it the first time we come to that point in the parse, but suppress it
unsigned int i;
const U32 n = ARG(node);
bool new_node_has_latin1 = FALSE;
- const U8 flags = OP(node) == ANYOFH ? 0 : ANYOF_FLAGS(node);
+ const U8 flags = OP(node) == ANYOFHb ? 0 : ANYOF_FLAGS(node);
PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
}
/* Add in the points from the bit map */
- if (OP(node) != ANYOFH) {
+ if (OP(node) != ANYOFH && OP(node) != ANYOFHb) {
for (i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
if (ANYOF_BITMAP_TEST(node, i)) {
unsigned int start = i++;
* another SSC or a regular ANYOF class. Can create false positives. */
SV* anded_cp_list;
- U8 and_with_flags = (OP(and_with) == ANYOFH) ? 0 : ANYOF_FLAGS(and_with);
+ U8 and_with_flags = (OP(and_with) == ANYOFHb) ? 0 : ANYOF_FLAGS(and_with);
U8 anded_flags;
PERL_ARGS_ASSERT_SSC_AND;
SV* ored_cp_list;
U8 ored_flags;
- U8 or_with_flags = (OP(or_with) == ANYOFH) ? 0 : ANYOF_FLAGS(or_with);
+ U8 or_with_flags = (OP(or_with) == ANYOFHb) ? 0 : ANYOF_FLAGS(or_with);
PERL_ARGS_ASSERT_SSC_OR;
case ANYOFL:
case ANYOFPOSIXL:
case ANYOFH:
+ case ANYOFHb:
case ANYOF:
if (flags & SCF_DO_STCLASS_AND)
ssc_and(pRExC_state, data->start_class,
const char* name;
name = get_regex_charset_name(RExC_rx->extflags, &len);
- if strEQ(name, DEPENDS_PAT_MODS) { /* /d under UTF-8 => /u */
+ if (strEQ(name, DEPENDS_PAT_MODS)) { /* /d under UTF-8 => /u */
assert(RExC_utf8);
name = UNICODE_PAT_MODS;
len = sizeof(UNICODE_PAT_MODS) - 1;
RExC_sawback = 1;
ret = reganode(pRExC_state,
((! FOLD)
- ? NREF
+ ? REFN
: (ASCII_FOLD_RESTRICTED)
- ? NREFFA
+ ? REFFAN
: (AT_LEAST_UNI_SEMANTICS)
- ? NREFFU
+ ? REFFUN
: (LOC)
- ? NREFFL
- : NREFF),
+ ? REFFLN
+ : REFFN),
num);
*flagp |= HASWIDTH;
if (!SvIOK(max_open)) {
sv_setiv(max_open, RE_COMPILE_RECURSION_INIT);
}
- if (depth > 4 * SvIV(max_open)) { /* We increase depth by 4 for each open
- paren */
+ if (depth > 4 * (UV) SvIV(max_open)) { /* We increase depth by 4 for each
+ open paren */
vFAIL("Too many nested open parens");
}
RExC_rxi->data->data[num]=(void*)sv_dat;
SvREFCNT_inc_simple_void_NN(sv_dat);
}
- ret = reganode(pRExC_state, NGROUPP, num);
+ ret = reganode(pRExC_state, GROUPPN, num);
goto insert_if_check_paren;
}
else if (memBEGINs(RExC_parse,
value = (U8 *) SvPV(value_sv, value_len);
/* See if the result is one code point vs 0 or multiple */
- if (value_len > 0 && value_len <= ((SvUTF8(value_sv))
- ? UTF8SKIP(value)
- : 1))
+ if (value_len > 0 && value_len <= (UV) ((SvUTF8(value_sv))
+ ? UTF8SKIP(value)
+ : 1))
{
/* Here, exactly one code point. If that isn't what is wanted,
* fail */
has_micro_sign = TRUE;
}
- *(s++) = (char) (DEPENDS_SEMANTICS)
- ? toFOLD(ender)
-
- /* Under /u, the fold of any
- * character in the 0-255 range
- * happens to be its lowercase
- * equivalent, except for LATIN SMALL
- * LETTER SHARP S, which was handled
- * above, and the MICRO SIGN, whose
- * fold requires UTF-8 to represent.
- * */
- : toLOWER_L1(ender);
+ *(s++) = (DEPENDS_SEMANTICS)
+ ? (char) toFOLD(ender)
+
+ /* Under /u, the fold of any character in
+ * the 0-255 range happens to be its
+ * lowercase equivalent, except for LATIN
+ * SMALL LETTER SHARP S, which was handled
+ * above, and the MICRO SIGN, whose fold
+ * requires UTF-8 to represent. */
+ : (char) toLOWER_L1(ender);
}
} /* End of adding current character to the node */
loopdone: /* Jumped to when encounters something that shouldn't be
in the node */
- /* Free up any over-allocated space */
- change_engine_size(pRExC_state, - (initial_size - STR_SZ(len)));
+ /* Free up any over-allocated space; cast is to silence bogus
+ * warning in MS VC */
+ change_engine_size(pRExC_state,
+ - (Ptrdiff_t) (initial_size - STR_SZ(len)));
/* I (khw) don't know if you can get here with zero length, but the
* old code handled this situation by creating a zero-length EXACT
RExC_emit += STR_SZ(len);
/* If the node isn't a single character, it can't be SIMPLE */
- if (len > ((UTF) ? UVCHR_SKIP(ender) : 1)) {
+ if (len > (Size_t) ((UTF) ? UVCHR_SKIP(ender) : 1)) {
maybe_SIMPLE = 0;
}
assert(PL_regkind[OP(node)] == ANYOF);
/* There is no bitmap for this node type */
- if (OP(node) == ANYOFH) {
+ if (OP(node) == ANYOFH || OP(node) == ANYOFHb) {
return;
}
FALSE, /* Require return to be an ANYOF */
¤t))
{
- FAIL2("panic: regclass returned failure to handle_sets, "
- "flags=%#" UVxf, (UV) *flagp);
+ goto regclass_failed;
}
/* regclass() will return with parsing just the \ sequence,
FALSE, /* Require return to be an ANYOF */
¤t))
{
- FAIL2("panic: regclass returned failure to handle_sets, "
- "flags=%#" UVxf, (UV) *flagp);
+ goto regclass_failed;
}
if (! current) {
}
if (!node)
- FAIL2("panic: regclass returned failure to handle_sets, flags=%#" UVxf,
- PTR2UV(flagp));
+ goto regclass_failed;
/* Fix up the node type if we are in locale. (We have pretended we are
* under /u for the purposes of regclass(), as this construct will only
nextchar(pRExC_state);
Set_Node_Length(REGNODE_p(node), RExC_parse - oregcomp_parse + 1); /* MJD */
return node;
+
+ regclass_failed:
+ FAIL2("panic: regclass returned failure to handle_sets, " "flags=%#" UVxf,
+ (UV) *flagp);
}
#ifdef ENABLE_REGEX_SETS_DEBUGGING
S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
const bool stop_at_1, /* Just parse the next thing, don't
look for a full character class */
- bool allow_multi_folds,
+ bool allow_mutiple_chars,
const bool silence_non_portable, /* Don't output warnings
about too large
characters */
#if UNICODE_MAJOR_VERSION < 3 /* no multifolds in early Unicode */ \
|| (UNICODE_MAJOR_VERSION == 3 && UNICODE_DOT_VERSION == 0 \
&& UNICODE_DOT_DOT_VERSION == 0)
- allow_multi_folds = FALSE;
+ allow_mutiple_chars = FALSE;
#endif
/* We include the /i status at the beginning of this so that we can
if (UCHARAT(RExC_parse) == '^') { /* Complement the class */
RExC_parse++;
invert = TRUE;
- allow_multi_folds = FALSE;
+ allow_mutiple_chars = FALSE;
MARK_NAUGHTY(1);
SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
}
else { /* cp_count > 1 */
assert(cp_count > 1);
if (! RExC_in_multi_char_class) {
- if (invert || range || *RExC_parse == '-') {
+ if ( ! allow_mutiple_chars
+ || invert
+ || range
+ || *RExC_parse == '-')
+ {
if (strict) {
RExC_parse--;
vFAIL("\\N{} in inverted character class or as a range end-point is restricted to one character");
) {
SV* scratch_list = NULL;
- /* What the Posix classes (like \w, [:space:]) match in locale
- * isn't knowable under locale until actual match time. A
+ /* What the Posix classes (like \w, [:space:]) match isn't
+ * generally knowable under locale until actual match time. A
* special node is used for these which has extra space for a
* bitmap, with a bit reserved for each named class that is to
- * be matched against. This isn't needed for \p{} and
+ * be matched against. (This isn't needed for \p{} and
* pseudo-classes, as they are not affected by locale, and
- * hence are dealt with separately */
- POSIXL_SET(posixl, namedclass);
- has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
- anyof_flags |= ANYOF_MATCHES_POSIXL;
-
- /* The above-Latin1 characters are not subject to locale rules.
- * Just add them to the unconditionally-matched list */
-
- /* Get the list of the above-Latin1 code points this matches */
- _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
- PL_XPosix_ptrs[classnum],
-
- /* Odd numbers are complements, like
- * NDIGIT, NASCII, ... */
- namedclass % 2 != 0,
- &scratch_list);
- /* Checking if 'cp_list' is NULL first saves an extra clone.
- * Its reference count will be decremented at the next union,
- * etc, or if this is the only instance, at the end of the
- * routine */
- if (! cp_list) {
- cp_list = scratch_list;
- }
- else {
- _invlist_union(cp_list, scratch_list, &cp_list);
- SvREFCNT_dec_NN(scratch_list);
+ * hence are dealt with separately.) However, if a named class
+ * and its complement are both present, then it matches
+ * everything, and there is no runtime dependency. Odd numbers
+ * are the complements of the next lower number, so xor works.
+ * (Note that something like [\w\D] should match everything,
+ * because \d should be a proper subset of \w. But rather than
+ * trust that the locale is well behaved, we leave this to
+ * runtime to sort out) */
+ if (POSIXL_TEST(posixl, namedclass ^ 1)) {
+ cp_list = _add_range_to_invlist(cp_list, 0, UV_MAX);
+ POSIXL_ZERO(posixl);
+ has_runtime_dependency &= ~HAS_L_RUNTIME_DEPENDENCY;
+ anyof_flags &= ~ANYOF_MATCHES_POSIXL;
+ continue; /* We could ignore the rest of the class, but
+ best to parse it for any errors */
+ }
+ else { /* Here, isn't the complement of any already parsed
+ class */
+ POSIXL_SET(posixl, namedclass);
+ has_runtime_dependency |= HAS_L_RUNTIME_DEPENDENCY;
+ anyof_flags |= ANYOF_MATCHES_POSIXL;
+
+ /* The above-Latin1 characters are not subject to locale
+ * rules. Just add them to the unconditionally-matched
+ * list */
+
+ /* Get the list of the above-Latin1 code points this
+ * matches */
+ _invlist_intersection_maybe_complement_2nd(PL_AboveLatin1,
+ PL_XPosix_ptrs[classnum],
+
+ /* Odd numbers are complements,
+ * like NDIGIT, NASCII, ... */
+ namedclass % 2 != 0,
+ &scratch_list);
+ /* Checking if 'cp_list' is NULL first saves an extra
+ * clone. Its reference count will be decremented at the
+ * next union, etc, or if this is the only instance, at the
+ * end of the routine */
+ if (! cp_list) {
+ cp_list = scratch_list;
+ }
+ else {
+ _invlist_union(cp_list, scratch_list, &cp_list);
+ SvREFCNT_dec_NN(scratch_list);
+ }
+ continue; /* Go get next character */
}
- continue; /* Go get next character */
}
else {
* "ss" =~ /^[^\xDF]+$/i => N
*
* See [perl #89750] */
- if (FOLD && allow_multi_folds && value == prevvalue) {
+ if (FOLD && allow_mutiple_chars && value == prevvalue) {
if ( value == LATIN_SMALL_LETTER_SHARP_S
|| (value > 255 && _invlist_contains_cp(PL_HasMultiCharFold,
value)))
literal
);
}
- else if isMNEMONIC_CNTRL(value) {
+ else if (isMNEMONIC_CNTRL(value)) {
vWARN4(RExC_parse,
"\"%.*s\" is more clearly written simply as \"%s\"",
(int) (RExC_parse - rangebegin),
RExC_emit += 1 + STR_SZ(len);
STR_LEN(REGNODE_p(ret)) = len;
if (len == 1) {
- *STRING(REGNODE_p(ret)) = value;
+ *STRING(REGNODE_p(ret)) = (U8) value;
}
else {
uvchr_to_utf8((U8 *) STRING(REGNODE_p(ret)), value);
/* If the lowest and highest code point in the class have the same
* UTF-8 first byte, then all do, and we can store that byte for
* regexec.c to use so that it can more quickly scan the target
- * string for potential matches for this class. We co-opt the the
- * flags field for this. Zero means, they don't have the same
- * first byte. We do accept here very large code points (for
- * future use), but don't bother with this optimization for them,
- * as it would cause other complications */
- if (highest_cp > IV_MAX) {
- anyof_flags = 0;
- }
- else {
+ * string for potential matches for this class. We co-opt the
+ * flags field for this, and make the node ANYOFb. We do accept
+ * here very large code points (for future use), but don't do
+ * this optimization for them, as it would cause other
+ * complications */
+ op = ANYOFH;
+ if (highest_cp <= IV_MAX) {
U8 low_utf8[UTF8_MAXBYTES+1];
U8 high_utf8[UTF8_MAXBYTES+1];
(void) uvchr_to_utf8(low_utf8, start[0]);
(void) uvchr_to_utf8(high_utf8, invlist_highest(cp_list));
- anyof_flags = (low_utf8[0] == high_utf8[0])
- ? low_utf8[0]
- : 0;
+ if (low_utf8[0] == high_utf8[0]) {
+ anyof_flags = low_utf8[0];
+ op = ANYOFHb;
+ }
}
- op = ANYOFH;
+ goto done_finding_op;
}
} /* End of seeing if can optimize it into a different node */
is_anyof: /* It's going to be an ANYOF node. */
- if (op != ANYOFH) {
- op = (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY)
- ? ANYOFD
- : ((posixl)
- ? ANYOFPOSIXL
- : ((LOC)
- ? ANYOFL
- : ANYOF));
- }
+ op = (has_runtime_dependency & HAS_D_RUNTIME_DEPENDENCY)
+ ? ANYOFD
+ : ((posixl)
+ ? ANYOFPOSIXL
+ : ((LOC)
+ ? ANYOFL
+ : ANYOF));
+
+ done_finding_op:
ret = regnode_guts(pRExC_state, op, regarglen[op], "anyof");
FILL_NODE(ret, op); /* We set the argument later */
}
if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
- assert(val - scan <= U32_MAX);
+ assert((UV) (val - scan) <= U32_MAX);
ARG_SET(REGNODE_p(scan), val - scan);
}
else {
);
});
if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
- assert(val - scan <= U32_MAX);
+ assert((UV) (val - scan) <= U32_MAX);
ARG_SET(REGNODE_p(scan), val - scan);
}
else {
name_list= RExC_paren_name_list;
}
if (name_list) {
- if ( k != REF || (OP(o) < NREF)) {
+ if ( k != REF || (OP(o) < REFN)) {
SV **name= av_fetch(name_list, parno, 0 );
if (name)
Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
/* 2: embedded, otherwise 1 */
Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
else if (k == ANYOF) {
- const U8 flags = (OP(o) == ANYOFH) ? 0 : ANYOF_FLAGS(o);
+ const U8 flags = (OP(o) == ANYOFHb) ? 0 : ANYOF_FLAGS(o);
bool do_sep = FALSE; /* Do we need to separate various components of
the output? */
/* Set if there is still an unresolved user-defined property */
/* Ready to start outputting. First, the initial left bracket */
Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
- if (OP(o) != ANYOFH) {
+ if (OP(o) != ANYOFH && OP(o) != ANYOFHb) {
/* Then all the things that could fit in the bitmap */
do_sep = put_charclass_bitmap_innards(sv,
ANYOF_BITMAP(o),
/* And finally the matching, closing ']' */
Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
- if (OP(o) == ANYOFH && FLAGS(o) != 0) {
+ if (OP(o) == ANYOFHb) {
Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=\\x%02x)", FLAGS(o));
}
RESTORE_CONTEXT;
}
+STATIC SV *
+S_get_fq_name(pTHX_
+ const char * const name, /* The first non-blank in the \p{}, \P{} */
+ const Size_t name_len, /* Its length in bytes, not including any trailing space */
+ const bool is_utf8, /* ? Is 'name' encoded in UTF-8 */
+ const bool has_colon_colon
+ )
+{
+ /* Returns a mortal SV containing the fully qualified version of the input
+ * name */
+
+ SV * fq_name;
+
+ fq_name = newSVpvs_flags("", SVs_TEMP);
+
+ /* Use the current package if it wasn't included in our input */
+ if (! has_colon_colon) {
+ const HV * pkg = (IN_PERL_COMPILETIME)
+ ? PL_curstash
+ : CopSTASH(PL_curcop);
+ const char* pkgname = HvNAME(pkg);
+
+ Perl_sv_catpvf(aTHX_ fq_name, "%" UTF8f,
+ UTF8fARG(is_utf8, strlen(pkgname), pkgname));
+ sv_catpvs(fq_name, "::");
+ }
+
+ Perl_sv_catpvf(aTHX_ fq_name, "%" UTF8f,
+ UTF8fARG(is_utf8, name_len, name));
+ return fq_name;
+}
+
SV *
Perl_parse_uniprop_string(pTHX_
int slash_pos = -1; /* Where the '/' is found, or negative if none */
int table_index = 0; /* The entry number for this property in the table
of all Unicode property names */
- bool starts_with_In_or_Is = FALSE; /* ? Does the name start with 'In' or
- 'Is' */
+ bool starts_with_Is = FALSE; /* ? Does the name start with 'Is' */
Size_t lookup_offset = 0; /* Used to ignore the first few characters of
the normalized name in certain situations */
Size_t non_pkg_begin = 0; /* Offset of first byte in 'name' that isn't
it is the definition. Otherwise it is a
string containing the fully qualified sub
name of 'name' */
+ SV * fq_name = NULL; /* For user-defined properties, the fully
+ qualified name */
bool invert_return = FALSE; /* ? Do we need to complement the result before
returning it */
pos_in_brackets = strchr("([<)]>)]>", open);
close = (pos_in_brackets) ? pos_in_brackets[3] : open;
- if ( name[name_len-1] != close
+ if ( i >= name_len
+ || name[name_len-1] != close
|| (escaped && name[name_len-2] != '\\'))
{
sv_catpvs(msg, "Unicode property wildcard not terminated");
&& name[non_pkg_begin+0] == 'I'
&& (name[non_pkg_begin+1] == 'n' || name[non_pkg_begin+1] == 's'))
{
- starts_with_In_or_Is = TRUE;
+ /* Names that start with In have different characterstics than those
+ * that start with Is */
+ if (name[non_pkg_begin+1] == 's') {
+ starts_with_Is = TRUE;
+ }
}
else {
could_be_user_defined = FALSE;
if (could_be_user_defined) {
CV* user_sub;
+ /* If the user defined property returns the empty string, it could
+ * easily be because the pattern is being compiled before the data it
+ * actually needs to compile is available. This could be argued to be
+ * a bug in the perl code, but this is a change of behavior for Perl,
+ * so we handle it. This means that intentionally returning nothing
+ * will not be resolved until runtime */
+ bool empty_return = FALSE;
+
/* Here, the name could be for a user defined property, which are
* implemented as subs. */
user_sub = get_cvn_flags(name, name_len, 0);
if (user_sub) {
+ const char insecure[] = "Insecure user-defined property";
/* Here, there is a sub by the correct name. Normally we call it
* to get the property definition */
dSP;
SV * user_sub_sv = MUTABLE_SV(user_sub);
SV * error; /* Any error returned by calling 'user_sub' */
- SV * fq_name; /* Fully qualified property name */
+ SV * key; /* The key into the hash of user defined sub names
+ */
SV * placeholder;
- char to_fold_string[] = "0:"; /* The 0 gets overwritten with the
- actual value */
SV ** saved_user_prop_ptr; /* Hash entry for this property */
/* How many times to retry when another thread is in the middle of
/* If we get here, we know this property is user-defined */
*user_defined_ptr = TRUE;
- /* We refuse to call a tainted subroutine; returning an error
- * instead */
+ /* We refuse to call a potentially tainted subroutine; returning an
+ * error instead */
if (TAINT_get) {
if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
- sv_catpvs(msg, "Insecure user-defined property");
+ sv_catpvn(msg, insecure, sizeof(insecure) - 1);
goto append_name_to_msg;
}
* should the need arise, passing the /i status as a parameter.
*
* We start by constructing the hash key name, consisting of the
- * fully qualified subroutine name */
- fq_name = sv_2mortal(newSV(10)); /* 10 is just a guess */
- (void) cv_name(user_sub, fq_name, 0);
-
- /* But precede the sub name in the key with the /i status, so that
- * there is a key for /i and a different key for non-/i */
- to_fold_string[0] = to_fold + '0';
- sv_insert(fq_name, 0, 0, to_fold_string, 2);
+ * fully qualified subroutine name, preceded by the /i status, so
+ * that there is a key for /i and a different key for non-/i */
+ key = newSVpvn(((to_fold) ? "1" : "0"), 1);
+ fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
+ non_pkg_begin != 0);
+ sv_catsv(key, fq_name);
+ sv_2mortal(key);
/* We only call the sub once throughout the life of the program
* (with the /i, non-/i exception noted above). That means the
/* If we have an entry for this key, the subroutine has already
* been called once with this /i status. */
saved_user_prop_ptr = hv_fetch(PL_user_def_props,
- SvPVX(fq_name), SvCUR(fq_name), 0);
+ SvPVX(key), SvCUR(key), 0);
if (saved_user_prop_ptr) {
/* If the saved result is an inversion list, it is the valid
* for this property in the hash. So we have the go ahead to
* expand the definition ourselves. */
+ PUSHSTACKi(PERLSI_MAGIC);
ENTER;
/* Create a temporary placeholder in the hash to detect recursion
* */
SWITCH_TO_GLOBAL_CONTEXT;
placeholder= newSVuv(PTR2IV(ORIGINAL_CONTEXT));
- (void) hv_store_ent(PL_user_def_props, fq_name, placeholder, 0);
+ (void) hv_store_ent(PL_user_def_props, key, placeholder, 0);
RESTORE_CONTEXT;
/* Now that we have a placeholder, we can let other threads
USER_PROP_MUTEX_UNLOCK;
/* Make sure the placeholder always gets destroyed */
- SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(fq_name));
+ SAVEDESTRUCTOR_X(S_delete_recursion_entry, SvPVX(key));
PUSHMARK(SP);
SAVETMPS;
XPUSHs(boolSV(to_fold));
PUTBACK;
+ /* The following block was taken from swash_init(). Presumably
+ * they apply to here as well, though we no longer use a swash --
+ * khw */
+ SAVEHINTS();
+ save_re_context();
+ /* We might get here via a subroutine signature which uses a utf8
+ * parameter name, at which point PL_subname will have been set
+ * but not yet used. */
+ save_item(PL_subname);
+
(void) call_sv(user_sub_sv, G_EVAL|G_SCALAR);
SPAGAIN;
error = ERRSV;
- if (SvTRUE(error)) {
+ if (TAINT_get || SvTRUE(error)) {
if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
- sv_catpvs(msg, "Error \"");
- sv_catsv(msg, error);
- sv_catpvs(msg, "\"");
+ if (SvTRUE(error)) {
+ sv_catpvs(msg, "Error \"");
+ sv_catsv(msg, error);
+ sv_catpvs(msg, "\"");
+ }
+ if (TAINT_get) {
+ if (SvTRUE(error)) sv_catpvs(msg, "; ");
+ sv_catpvn(msg, insecure, sizeof(insecure) - 1);
+ }
+
if (name_len > 0) {
sv_catpvs(msg, " in expansion of ");
Perl_sv_catpvf(aTHX_ msg, "%" UTF8f, UTF8fARG(is_utf8,
prop_definition = NULL;
}
else { /* G_SCALAR guarantees a single return value */
+ SV * contents = POPs;
/* The contents is supposed to be the expansion of the property
- * definition. Call a function to check for valid syntax and
- * handle it */
- prop_definition = handle_user_defined_property(name, name_len,
+ * definition. If the definition is deferrable, and we got an
+ * empty string back, set a flag to later defer it (after clean
+ * up below). */
+ if ( deferrable
+ && (! SvPOK(contents) || SvCUR(contents) == 0))
+ {
+ empty_return = TRUE;
+ }
+ else { /* Otherwise, call a function to check for valid syntax,
+ and handle it */
+
+ prop_definition = handle_user_defined_property(
+ name, name_len,
is_utf8, to_fold, runtime,
deferrable,
- POPs, user_defined_ptr,
+ contents, user_defined_ptr,
msg,
level);
+ }
}
/* Here, we have the results of the expansion. Delete the
* and add the permanent entry */
USER_PROP_MUTEX_LOCK;
- S_delete_recursion_entry(aTHX_ SvPVX(fq_name));
-
- if (! prop_definition || is_invlist(prop_definition)) {
+ S_delete_recursion_entry(aTHX_ SvPVX(key));
+ if ( ! empty_return
+ && (! prop_definition || is_invlist(prop_definition)))
+ {
/* If we got success we use the inversion list defining the
* property; otherwise use the error message */
SWITCH_TO_GLOBAL_CONTEXT;
(void) hv_store_ent(PL_user_def_props,
- fq_name,
+ key,
((prop_definition)
? newSVsv(prop_definition)
: newSVsv(msg)),
FREETMPS;
LEAVE;
+ POPSTACK;
+
+ if (empty_return) {
+ goto definition_deferred;
+ }
if (prop_definition) {
/* If it didn't find the property ... */
if (table_index == 0) {
- /* Try again stripping off any initial 'In' or 'Is' */
- if (starts_with_In_or_Is) {
+ /* Try again stripping off any initial 'Is'. This is because we
+ * promise that an initial Is is optional. The same isn't true of
+ * names that start with 'In'. Those can match only blocks, and the
+ * lookup table already has those accounted for. */
+ if (starts_with_Is) {
lookup_name += 2;
lookup_len -= 2;
equals_pos -= 2;
* NV. */
NV value;
+ SSize_t value_len = lookup_len - equals_pos;
/* Get the value */
- if (my_atof3(lookup_name + equals_pos, &value,
- lookup_len - equals_pos)
+ if ( value_len <= 0
+ || my_atof3(lookup_name + equals_pos, &value,
+ value_len)
!= lookup_name + lookup_len)
{
goto failed;
definition_deferred:
/* Here it could yet to be defined, so defer evaluation of this
- * until its needed at runtime. */
- prop_definition = newSVpvs_flags("", SVs_TEMP);
-
- /* To avoid any ambiguity, the package is always specified.
- * Use the current one if it wasn't included in our input */
- if (non_pkg_begin == 0) {
- const HV * pkg = (IN_PERL_COMPILETIME)
- ? PL_curstash
- : CopSTASH(PL_curcop);
- const char* pkgname = HvNAME(pkg);
-
- Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
- UTF8fARG(is_utf8, strlen(pkgname), pkgname));
- sv_catpvs(prop_definition, "::");
+ * until its needed at runtime. We need the fully qualified property name
+ * to avoid ambiguity, and a trailing newline */
+ if (! fq_name) {
+ fq_name = S_get_fq_name(aTHX_ name, name_len, is_utf8,
+ non_pkg_begin != 0 /* If has "::" */
+ );
}
-
- Perl_sv_catpvf(aTHX_ prop_definition, "%" UTF8f,
- UTF8fARG(is_utf8, name_len, name));
- sv_catpvs(prop_definition, "\n");
+ sv_catpvs(fq_name, "\n");
*user_defined_ptr = TRUE;
- return prop_definition;
+ return fq_name;
}
#endif