#define WORST 0 /* Worst case. */
#define HASWIDTH 0x01 /* Known to match non-null strings. */
-/* Simple enough to be STAR/PLUS operand; in an EXACT node must be a single
- * character. Note that this is not the same thing as REGNODE_SIMPLE */
+/* Simple enough to be STAR/PLUS operand; in an EXACTish node must be a single
+ * character. (There needs to be a case: in the switch statement in regexec.c
+ * for any node marked SIMPLE.) Note that this is not the same thing as
+ * REGNODE_SIMPLE */
#define SIMPLE 0x02
-#define SPSTART 0x04 /* Starts with * or +. */
+#define SPSTART 0x04 /* Starts with * or + */
#define TRYAGAIN 0x08 /* Weeded out a declaration. */
#define POSTPONED 0x10 /* (?1),(?&name), (??{...}) or similar */
IV len = RExC_end - RExC_precomp; \
\
if (!SIZE_ONLY) \
- SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv); \
+ SAVEFREESV(RExC_rx_sv); \
if (len > RegexLengthToShowInErrorMessages) { \
/* chop 10 shorter than the max, to ensure meaning of "..." */ \
len = RegexLengthToShowInErrorMessages - 10; \
*/
#define vFAIL(m) STMT_START { \
if (!SIZE_ONLY) \
- SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv); \
+ SAVEFREESV(RExC_rx_sv); \
Simple_vFAIL(m); \
} STMT_END
*/
#define vFAIL2(m,a1) STMT_START { \
if (!SIZE_ONLY) \
- SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv); \
+ SAVEFREESV(RExC_rx_sv); \
Simple_vFAIL2(m, a1); \
} STMT_END
*/
#define vFAIL3(m,a1,a2) STMT_START { \
if (!SIZE_ONLY) \
- SAVEDESTRUCTOR_X(clear_re,(void*)RExC_rx_sv); \
+ SAVEFREESV(RExC_rx_sv); \
Simple_vFAIL3(m, a1, a2); \
} STMT_END
PerlIO_printf(Perl_debug_log,"\n"); \
});
-static void clear_re(pTHX_ void *r);
-
/* Mark that we cannot extend a found fixed substring at this point.
Update the longest found anchored substring and the longest found
floating substrings if needed. */
ANYOF_BITMAP_SETALL(cl);
cl->flags = ANYOF_CLASS|ANYOF_EOS|ANYOF_UNICODE_ALL
- |ANYOF_LOC_NONBITMAP_FOLD|ANYOF_NON_UTF8_LATIN1_ALL;
+ |ANYOF_NON_UTF8_LATIN1_ALL;
/* If any portion of the regex is to operate under locale rules,
* initialization includes it. The reason this isn't done for all regexes
* necessary. */
if (RExC_contains_locale) {
ANYOF_CLASS_SETALL(cl); /* /l uses class */
- cl->flags |= ANYOF_LOCALE;
+ cl->flags |= ANYOF_LOCALE|ANYOF_LOC_FOLD;
}
else {
ANYOF_CLASS_ZERO(cl); /* Only /l uses class now */
if (!(ANYOF_CLASS_TEST_ANY_SET(and_with))
&& !(ANYOF_CLASS_TEST_ANY_SET(cl))
&& (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
- && !(and_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
- && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) {
+ && !(and_with->flags & ANYOF_LOC_FOLD)
+ && !(cl->flags & ANYOF_LOC_FOLD)) {
int i;
if (and_with->flags & ANYOF_INVERT)
* (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i'))
*/
else if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
- && !(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
- && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD) ) {
+ && !(or_with->flags & ANYOF_LOC_FOLD)
+ && !(cl->flags & ANYOF_LOC_FOLD) ) {
int i;
for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
} else { /* 'or_with' is not inverted */
/* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
- && (!(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
- || (cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) ) {
+ && (!(or_with->flags & ANYOF_LOC_FOLD)
+ || (cl->flags & ANYOF_LOC_FOLD)) ) {
int i;
/* OR char bitmap and class bitmap separately */
for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
cl->bitmap[i] |= or_with->bitmap[i];
- if (ANYOF_CLASS_TEST_ANY_SET(or_with)) {
- for (i = 0; i < ANYOF_CLASSBITMAP_SIZE; i++)
- cl->classflags[i] |= or_with->classflags[i];
- cl->flags |= ANYOF_CLASS;
- }
+ ANYOF_CLASS_OR(or_with, cl);
}
else { /* XXXX: logic is complicated, leave it along for a moment. */
cl_anything(pRExC_state, cl);
/* Here, the pattern is not UTF-8. Look for the multi-char folds
* that are all ASCII. As in the above case, EXACTFL and EXACTFA
* nodes can't have multi-char folds to this range (and there are
- * no existing ones to the upper latin1 range). In the EXACTF
+ * no existing ones in the upper latin1 range). In the EXACTF
* case we look also for the sharp s, which can be in the final
* position. Otherwise we can stop looking 1 byte earlier because
* have to find at least two characters for a multi-fold */
const U8 s_masked = 's' & S_or_s_mask;
while (s < upper) {
- int len = is_MULTI_CHAR_FOLD_low_safe(s, s_end);
+ int len = is_MULTI_CHAR_FOLD_latin1_safe(s, s_end);
if (! len) { /* Not a multi-char fold. */
if (*s == LATIN_SMALL_LETTER_SHARP_S && OP(scan) == EXACTF)
{
if (uc >= 0x100 ||
(!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
&& !ANYOF_BITMAP_TEST(data->start_class, uc)
- && (!(data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD)
+ && (!(data->start_class->flags & ANYOF_LOC_FOLD)
|| !ANYOF_BITMAP_TEST(data->start_class, PL_fold_latin1[uc])))
)
{
if (compat) {
ANYOF_BITMAP_SET(data->start_class, uc);
data->start_class->flags &= ~ANYOF_EOS;
- data->start_class->flags |= ANYOF_LOC_NONBITMAP_FOLD;
if (OP(scan) == EXACTFL) {
/* XXX This set is probably no longer necessary, and
* probably wrong as LOCALE now is on in the initial
* state */
- data->start_class->flags |= ANYOF_LOCALE;
+ data->start_class->flags |= ANYOF_LOCALE|ANYOF_LOC_FOLD;
}
else {
}
}
else if (flags & SCF_DO_STCLASS_OR) {
- if (data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD) {
+ if (data->start_class->flags & ANYOF_LOC_FOLD) {
/* false positive possible if the class is case-folded.
Assume that the locale settings are the same... */
if (uc < 0x100) {
&& data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
&& maxcount <= REG_INFTY/3) /* Complement check for big count */
{
+ /* Fatal warnings may leak the regexp without this: */
+ SAVEFREESV(RExC_rx_sv);
ckWARNreg(RExC_parse,
"Quantifier unexpected on zero-length expression");
+ (void)ReREFCNT_inc(RExC_rx_sv);
}
min += minnext * mincount;
#ifdef TRIE_STUDY_OPT
-#define CHECK_RESTUDY_GOTO \
+#define CHECK_RESTUDY_GOTO_butfirst(dOsomething) \
+ STMT_START { \
if ( \
(data.flags & SCF_TRIE_RESTUDY) \
&& ! restudied++ \
- ) goto reStudy
+ ) { \
+ dOsomething; \
+ goto reStudy; \
+ } \
+ } STMT_END
#else
-#define CHECK_RESTUDY_GOTO
+#define CHECK_RESTUDY_GOTO_butfirst
#endif
/*
*
* becomes
*
- * qr'a\\bc def\'ghi\\\\jkl(?{"this is runtime"})mno'
+ * qr'a\\bc_______________________def\'ghi\\\\jkl(?{"this is runtime"})mno'
*
* After eval_sv()-ing that, grab any new code blocks from the returned qr
* and merge them with any code blocks of the original regexp.
/* blank out literal code block */
assert(pat[s] == '(');
while (s <= pRExC_state->code_blocks[n].end) {
- *p++ = ' ';
+ *p++ = '_';
s++;
}
s--;
SPAGAIN;
qr_ref = POPs;
PUTBACK;
- if (SvTRUE(ERRSV))
- Perl_croak(aTHX_ "%s", SvPVx_nolen_const(ERRSV));
+ {
+ SV * const errsv = ERRSV;
+ if (SvTRUE_NN(errsv))
+ {
+ Safefree(pRExC_state->code_blocks);
+ /* use croak_sv ? */
+ Perl_croak_nocontext("%s", SvPV_nolen_const(errsv));
+ }
+ }
assert(SvROK(qr_ref));
qr = SvRV(qr_ref);
assert(SvTYPE(qr) == SVt_REGEXP && RX_ENGINE((REGEXP*)qr)->op_comp);
/* merge the main (r1) and run-time (r2) code blocks into one */
{
- RXi_GET_DECL(((struct regexp*)SvANY(qr)), r2);
+ RXi_GET_DECL(ReANY((REGEXP *)qr), r2);
struct reg_code_block *new_block, *dst;
RExC_state_t * const r1 = pRExC_state; /* convenient alias */
int i1 = 0, i2 = 0;
if (!r2->num_code_blocks) /* we guessed wrong */
+ {
+ SvREFCNT_dec(qr);
return 1;
+ }
Newx(new_block,
r1->num_code_blocks + r2->num_code_blocks,
I32 minlen = 0;
U32 rx_flags;
SV * VOL pat;
+ SV * VOL code_blocksv = NULL;
/* these are all flags - maybe they should be turned
* into a single int with different bit masks */
if (pRExC_state->num_code_blocks) {
o = cLISTOPx(expr)->op_first;
- assert(o->op_type == OP_PUSHMARK);
+ assert( o->op_type == OP_PUSHMARK
+ || (o->op_type == OP_NULL && o->op_targ == OP_PUSHMARK)
+ || o->op_type == OP_PADRANGE);
o = o->op_sibling;
}
SV *sv, *msv = *svp;
SV *rx;
bool code = 0;
+ /* we make the assumption here that each op in the list of
+ * op_siblings maps to one SV pushed onto the stack,
+ * except for code blocks, with have both an OP_NULL and
+ * and OP_CONST.
+ * This allows us to match up the list of SVs against the
+ * list of OPs to find the next code block.
+ *
+ * Note that PUSHMARK PADSV PADSV ..
+ * is optimised to
+ * PADRANGE NULL NULL ..
+ * so the alignment still works. */
if (o) {
if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
assert(n < pRExC_state->num_code_blocks);
&& RX_ENGINE((REGEXP*)rx)->op_comp)
{
- RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri);
+ RXi_GET_DECL(ReANY((REGEXP *)rx), ri);
if (ri->num_code_blocks) {
int i;
/* the presence of an embedded qr// with code means
for (i=0; i < ri->num_code_blocks; i++) {
struct reg_code_block *src, *dst;
STRLEN offset = orig_patlen
- + ((struct regexp *)SvANY(rx))->pre_prefix;
+ + ReANY((REGEXP *)rx)->pre_prefix;
assert(n < pRExC_state->num_code_blocks);
src = &ri->code_blocks[i];
dst = &pRExC_state->code_blocks[n];
RExC_pm_flags = pm_flags;
if (runtime_code) {
- if (PL_tainting && PL_tainted)
+ if (TAINTING_get && TAINT_get)
Perl_croak(aTHX_ "Eval-group in insecure regular expression");
if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
RExC_lastnum=0;
RExC_lastparse=NULL;
);
+ /* reg may croak on us, not giving us a chance to free
+ pRExC_state->code_blocks. We cannot SAVEFREEPV it now, as we may
+ need it to survive as long as the regexp (qr/(?{})/).
+ We must check that code_blocksv is not already set, because we may
+ have longjmped back. */
+ if (pRExC_state->code_blocks && !code_blocksv) {
+ code_blocksv = newSV_type(SVt_PV);
+ SAVEFREESV(code_blocksv);
+ SvPV_set(code_blocksv, (char *)pRExC_state->code_blocks);
+ SvLEN_set(code_blocksv, 1); /*sufficient to make sv_clear free it*/
+ }
if (reg(pRExC_state, 0, &flags,1) == NULL) {
RExC_precomp = NULL;
- Safefree(pRExC_state->code_blocks);
return(NULL);
}
+ if (code_blocksv)
+ SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */
/* Here, finished first pass. Get rid of any added setjmp */
if (used_setjump) {
of zeroing when in debug mode, thus anything assigned has to
happen after that */
rx = (REGEXP*) newSV_type(SVt_REGEXP);
- r = (struct regexp*)SvANY(rx);
+ r = ReANY(rx);
Newxc(ri, sizeof(regexp_internal) + (unsigned)RExC_size * sizeof(regnode),
char, regexp_internal);
if ( r == NULL || ri == NULL )
ri->num_code_blocks = pRExC_state->num_code_blocks;
}
else
+ {
+ int n;
+ for (n = 0; n < pRExC_state->num_code_blocks; n++)
+ if (pRExC_state->code_blocks[n].src_regex)
+ SAVEFREESV(pRExC_state->code_blocks[n].src_regex);
SAVEFREEPV(pRExC_state->code_blocks);
+ }
{
bool has_p = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
+ (sizeof(STD_PAT_MODS) - 1)
+ (sizeof("(?:)") - 1);
- p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
- SvPOK_on(rx);
+ Newx(p, wraplen + 1, char); /* +1 for the ending NUL */
+ r->xpv_len_u.xpvlenu_pv = p;
if (RExC_utf8)
SvFLAGS(rx) |= SVf_UTF8;
*p++='('; *p++='?';
*p++ = '\n';
*p++ = ')';
*p = 0;
- SvCUR_set(rx, p - SvPVX_const(rx));
+ SvCUR_set(rx, p - RX_WRAPPED(rx));
}
r->intflags = 0;
RExC_seen |= REG_TOP_LEVEL_BRANCHES;
else
RExC_seen &= ~REG_TOP_LEVEL_BRANCHES;
- if (data.last_found) {
- SvREFCNT_dec(data.longest_fixed);
- SvREFCNT_dec(data.longest_float);
- SvREFCNT_dec(data.last_found);
- }
StructCopy(&zero_scan_data, &data, scan_data_t);
}
#else
data.longest_float = newSVpvs("");
data.last_found = newSVpvs("");
data.longest = &(data.longest_fixed);
+ ENTER_with_name("study_chunk");
+ SAVEFREESV(data.longest_fixed);
+ SAVEFREESV(data.longest_float);
+ SAVEFREESV(data.last_found);
first = scan;
if (!ri->regstclass) {
cl_init(pRExC_state, &ch_class);
SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag,0);
- CHECK_RESTUDY_GOTO;
+ CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
if ( RExC_npar == 1 && data.longest == &(data.longest_fixed)
&& (!(RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS)))
r->extflags |= RXf_CHECK_ALL;
scan_commit(pRExC_state, &data,&minlen,0);
- SvREFCNT_dec(data.last_found);
longest_float_length = CHR_SVLEN(data.longest_float);
r->float_max_offset = data.offset_float_max;
if (data.offset_float_max < I32_MAX) /* Don't offset infinity */
r->float_max_offset -= data.lookbehind_float;
+ SvREFCNT_inc_simple_void_NN(data.longest_float);
}
else {
r->float_substr = r->float_utf8 = NULL;
- SvREFCNT_dec(data.longest_float);
longest_float_length = 0;
}
data.flags & SF_FIX_BEFORE_MEOL))
{
r->anchored_offset = data.offset_fixed - data.lookbehind_fixed;
+ SvREFCNT_inc_simple_void_NN(data.longest_fixed);
}
else {
r->anchored_substr = r->anchored_utf8 = NULL;
- SvREFCNT_dec(data.longest_fixed);
longest_fixed_length = 0;
}
+ LEAVE_with_name("study_chunk");
if (ri->regstclass
&& (OP(ri->regstclass) == REG_ANY || OP(ri->regstclass) == SANY))
minlen = study_chunk(pRExC_state, &scan, &minlen, &fake, scan + RExC_size,
&data, -1, NULL, NULL, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS,0);
- CHECK_RESTUDY_GOTO;
+ CHECK_RESTUDY_GOTO_butfirst(NOOP);
r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
= r->float_substr = r->float_utf8 = NULL;
if (flags & RXapif_FETCH) {
return reg_named_buff_fetch(rx, key, flags);
} else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
- Perl_croak_no_modify(aTHX);
+ Perl_croak_no_modify();
return NULL;
} else if (flags & RXapif_EXISTS) {
return reg_named_buff_exists(rx, key, flags)
{
AV *retarray = NULL;
SV *ret;
- struct regexp *const rx = (struct regexp *)SvANY(r);
+ struct regexp *const rx = ReANY(r);
PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key,
const U32 flags)
{
- struct regexp *const rx = (struct regexp *)SvANY(r);
+ struct regexp *const rx = ReANY(r);
PERL_ARGS_ASSERT_REG_NAMED_BUFF_EXISTS;
SV*
Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
{
- struct regexp *const rx = (struct regexp *)SvANY(r);
+ struct regexp *const rx = ReANY(r);
PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY;
SV*
Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
{
- struct regexp *const rx = (struct regexp *)SvANY(r);
+ struct regexp *const rx = ReANY(r);
GET_RE_DEBUG_FLAGS_DECL;
PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
SV *ret;
AV *av;
I32 length;
- struct regexp *const rx = (struct regexp *)SvANY(r);
+ struct regexp *const rx = ReANY(r);
PERL_ARGS_ASSERT_REG_NAMED_BUFF_SCALAR;
SV*
Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
{
- struct regexp *const rx = (struct regexp *)SvANY(r);
+ struct regexp *const rx = ReANY(r);
AV *av = newAV();
PERL_ARGS_ASSERT_REG_NAMED_BUFF_ALL;
Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
SV * const sv)
{
- struct regexp *const rx = (struct regexp *)SvANY(r);
+ struct regexp *const rx = ReANY(r);
char *s = NULL;
I32 i = 0;
I32 s1, t1;
assert(s >= rx->subbeg);
assert(rx->sublen >= (s - rx->subbeg) + i );
if (i >= 0) {
- const int oldtainted = PL_tainted;
+#if NO_TAINT_SUPPORT
+ sv_setpvn(sv, s, i);
+#else
+ const int oldtainted = TAINT_get;
TAINT_NOT;
sv_setpvn(sv, s, i);
- PL_tainted = oldtainted;
+ TAINT_set(oldtainted);
+#endif
if ( (rx->extflags & RXf_CANY_SEEN)
? (RXp_MATCH_UTF8(rx)
&& (!i || is_utf8_string((U8*)s, i)))
}
else
SvUTF8_off(sv);
- if (PL_tainting) {
+ if (TAINTING_get) {
if (RXp_MATCH_TAINTED(rx)) {
if (SvTYPE(sv) >= SVt_PVMG) {
MAGIC* const mg = SvMAGIC(sv);
MAGIC* mgt;
- PL_tainted = 1;
+ TAINT;
SvMAGIC_set(sv, mg->mg_moremagic);
SvTAINT(sv);
if ((mgt = SvMAGIC(sv))) {
SvMAGIC_set(sv, mg);
}
} else {
- PL_tainted = 1;
+ TAINT;
SvTAINT(sv);
}
} else
PERL_UNUSED_ARG(value);
if (!PL_localizing)
- Perl_croak_no_modify(aTHX);
+ Perl_croak_no_modify();
}
I32
Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
const I32 paren)
{
- struct regexp *const rx = (struct regexp *)SvANY(r);
+ struct regexp *const rx = ReANY(r);
I32 i;
I32 s1, t1;
* And benchmarks show that caching gives better results. We also test
* here if the code point is within the bounds of the list. These tests
* replace others that would have had to be made anyway to make sure that
- * the array bounds were not exceeded, and give us extra information at the
- * same time */
+ * the array bounds were not exceeded, and these give us extra information
+ * at the same time */
if (cp >= array[mid]) {
if (cp >= array[highest_element]) {
return highest_element;
}
#endif
-#if 0
+#ifdef PERL_ARGS_ASSERT__INVLIST_DUMP
void
-S_invlist_dump(pTHX_ SV* const invlist, const char * const header)
+Perl__invlist_dump(pTHX_ SV* const invlist, const char * const header)
{
/* Dumps out the ranges in an inversion list. The string 'header'
* if present is output on a line before the first range */
UV start, end;
+ PERL_ARGS_ASSERT__INVLIST_DUMP;
+
if (header && strlen(header)) {
PerlIO_printf(Perl_debug_log, "%s\n", header);
}
if (end == UV_MAX) {
PerlIO_printf(Perl_debug_log, "0x%04"UVXf" .. INFINITY\n", start);
}
+ else if (end != start) {
+ PerlIO_printf(Perl_debug_log, "0x%04"UVXf" .. 0x%04"UVXf"\n",
+ start, end);
+ }
else {
- PerlIO_printf(Perl_debug_log, "0x%04"UVXf" .. 0x%04"UVXf"\n", start, end);
+ PerlIO_printf(Perl_debug_log, "0x%04"UVXf"\n", start);
}
}
}
}
nest_check:
if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
+ SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */
ckWARN3reg(RExC_parse,
"%.*s matches null string many times",
(int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
origparse);
+ (void)ReREFCNT_inc(RExC_rx_sv);
}
if (RExC_parse < RExC_end && *RExC_parse == '?') {
if (in_char_class && has_multiple_chars) {
ckWARNreg(endchar, "Using just the first character returned by \\N{} in character class");
}
+
RExC_parse = endbrace + 1;
}
else if (! node_p || ! has_multiple_chars) {
#define POSIXCC_NOTYET(c) ((c) == '=' || (c) == '.')
#define POSIXCC(c) (POSIXCC_DONE(c) || POSIXCC_NOTYET(c))
-STATIC I32
-S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value)
+PERL_STATIC_INLINE I32
+S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me)
{
dVAR;
I32 namedclass = OOB_NAMEDCLASS;
the class closes */
while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse) != ']')
RExC_parse++;
- Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
+ SvREFCNT_dec(free_me);
+ vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
}
} else {
/* Maternal grandfather:
return namedclass;
}
-STATIC void
-S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
-{
- dVAR;
-
- PERL_ARGS_ASSERT_CHECKPOSIXCC;
-
- if (POSIXCC(UCHARAT(RExC_parse))) {
- const char *s = RExC_parse;
- const char c = *s++;
-
- while (isALNUM(*s))
- s++;
- if (*s && c == *s && s[1] == ']') {
- ckWARN3reg(s+2,
- "POSIX syntax [%c %c] belongs inside character classes",
- c, c);
-
- /* [[=foo=]] and [[.foo.]] are still future. */
- if (POSIXCC_NOTYET(c)) {
- /* adjust RExC_parse so the error shows after
- the class closes */
- while (UCHARAT(RExC_parse) && UCHARAT(RExC_parse++) != ']')
- NOOP;
- Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
- }
- }
- }
-}
-
/* Generate the code to add a full posix character <class> to the bracketed
* character class given by <node>. (<node> is needed only under locale rules)
* destlist is the inversion list for non-locale rules that this class is
* reg() gets called (recursively) on the rewritten version, and this
* function will return what it constructs. (Actually the <multi-fold>s
* aren't physically removed from the [abcdefghi], it's just that they are
- * ignored in the recursion by means of a a flag:
+ * ignored in the recursion by means of a flag:
* <RExC_in_multi_char_class>.)
*
* ANYOF nodes contain a bit map for the first 256 characters, with the
dVAR;
UV nextvalue;
- UV prevvalue, save_prevvalue = OOB_UNICODE;
+ UV prevvalue = OOB_UNICODE, save_prevvalue = OOB_UNICODE;
IV range = 0;
- UV value, save_value = 0;
+ UV value = OOB_UNICODE, save_value = OOB_UNICODE;
regnode *ret;
STRLEN numlen;
IV namedclass = OOB_NAMEDCLASS;
char *rangebegin = NULL;
bool need_class = 0;
- bool allow_full_fold = TRUE; /* Assume wants multi-char folding */
SV *listsv = NULL;
STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
than just initialized. */
/* Assume we are going to generate an ANYOF node. */
ret = reganode(pRExC_state, ANYOF, 0);
-
if (!SIZE_ONLY) {
ANYOF_FLAGS(ret) = 0;
}
if (UCHARAT(RExC_parse) == '^') { /* Complement of range. */
RExC_parse++;
- if (! RExC_in_multi_char_class) {
- invert = TRUE;
- RExC_naughty++;
-
- /* We have decided to not allow multi-char folds in inverted
- * character classes, due to the confusion that can happen,
- * especially with classes that are designed for a non-Unicode
- * world: You have the peculiar case that:
- "s s" =~ /^[^\xDF]+$/i => Y
- "ss" =~ /^[^\xDF]+$/i => N
- *
- * See [perl #89750] */
- allow_full_fold = FALSE;
- }
+ invert = TRUE;
+ RExC_naughty++;
}
if (SIZE_ONLY) {
nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
if (!SIZE_ONLY && POSIXCC(nextvalue))
- checkposixcc(pRExC_state);
+ {
+ const char *s = RExC_parse;
+ const char c = *s++;
+
+ while (isALNUM(*s))
+ s++;
+ if (*s && c == *s && s[1] == ']') {
+ SAVEFREESV(RExC_rx_sv);
+ SAVEFREESV(listsv);
+ ckWARN3reg(s+2,
+ "POSIX syntax [%c %c] belongs inside character classes",
+ c, c);
+ (void)ReREFCNT_inc(RExC_rx_sv);
+ SvREFCNT_inc_simple_void_NN(listsv);
+ }
+ }
/* allow 1st char to be ] (allowing it to be - is dealt with later) */
if (UCHARAT(RExC_parse) == ']')
nextvalue = RExC_parse < RExC_end ? UCHARAT(RExC_parse) : 0;
if (value == '[' && POSIXCC(nextvalue))
- namedclass = regpposixcc(pRExC_state, value);
+ namedclass = regpposixcc(pRExC_state, value, listsv);
else if (value == '\\') {
if (UTF) {
value = utf8n_to_uvchr((U8*)RExC_parse,
Safefree(name);
}
RExC_parse = e + 1;
- namedclass = ANYOF_MAX; /* no official name, but it's named */
+ namedclass = ANYOF_UNIPROP; /* no official name, but it's named */
/* \p means they want Unicode semantics */
RExC_uni_semantics = 1;
default:
/* Allow \_ to not give an error */
if (!SIZE_ONLY && isALNUM(value) && value != '_') {
+ SAVEFREESV(RExC_rx_sv);
+ SAVEFREESV(listsv);
ckWARN2reg(RExC_parse,
"Unrecognized escape \\%c in character class passed through",
(int)value);
+ (void)ReREFCNT_inc(RExC_rx_sv);
+ SvREFCNT_inc_simple_void_NN(listsv);
}
break;
}
const int w =
RExC_parse >= rangebegin ?
RExC_parse - rangebegin : 0;
+ SAVEFREESV(RExC_rx_sv); /* in case of fatal warnings */
+ SAVEFREESV(listsv);
ckWARN4reg(RExC_parse,
"False [] range \"%*.*s\"",
w, w, rangebegin);
+ (void)ReREFCNT_inc(RExC_rx_sv);
+ SvREFCNT_inc_simple_void_NN(listsv);
cp_list = add_cp_to_invlist(cp_list, '-');
cp_list = add_cp_to_invlist(cp_list, prevvalue);
}
SV* scratch_list = NULL;
/* Include all above-Latin1 non-blanks */
- _invlist_subtract(PL_AboveLatin1, PL_XPosixBlank, &scratch_list);
+ _invlist_subtract(PL_AboveLatin1, PL_XPosixBlank,
+ &scratch_list);
/* Add them to the running total of posix classes */
- _invlist_subtract(PL_AboveLatin1, PL_XPosixBlank, &scratch_list);
+ _invlist_subtract(PL_AboveLatin1, PL_XPosixBlank,
+ &scratch_list);
if (! posixes) {
posixes = scratch_list;
}
/* Get the list of all non-ASCII-blanks in Latin 1, and
* add them to the running total */
- _invlist_subtract(PL_Latin1, PL_PosixBlank, &scratch_list);
+ _invlist_subtract(PL_Latin1, PL_PosixBlank,
+ &scratch_list);
_invlist_union(posixes, scratch_list, &posixes);
SvREFCNT_dec(scratch_list);
}
DO_N_POSIX(ret, namedclass, posixes,
PL_PosixXDigit, PL_XPosixXDigit);
break;
- case ANYOF_MAX:
- /* this is to handle \p and \P */
+ case ANYOF_UNIPROP: /* this is to handle \p and \P */
break;
default:
vFAIL("Invalid [::] class");
* For single-valued non-inverted ranges, we consider the possibility
* of multi-char folds. (We made a conscious decision to not do this
* for the other cases because it can often lead to non-intuitive
- * results) */
+ * results. For example, you have the peculiar case that:
+ * "s s" =~ /^[^\xDF]+$/i => Y
+ * "ss" =~ /^[^\xDF]+$/i => N
+ *
+ * See [perl #89750] */
if (FOLD && ! invert && value == prevvalue) {
if (value == LATIN_SMALL_LETTER_SHARP_S
|| (value > 255 && _invlist_contains_cp(PL_HasMultiCharFold,
if (! RExC_in_multi_char_class) {
AV** this_array_ptr;
AV* this_array;
- STRLEN cp_count = utf8_length(foldbuf, foldbuf + foldlen);
+ STRLEN cp_count = utf8_length(foldbuf,
+ foldbuf + foldlen);
SV* multi_fold = sv_2mortal(newSVpvn("", 0));
Perl_sv_catpvf(aTHX_ multi_fold, "\\x{%"UVXf"}", value);
* ok. This makes the test for the ligature 'ffi' come
* before the test for 'ff' */
if (av_exists(multi_char_matches, cp_count)) {
- this_array_ptr = (AV**) av_fetch(multi_char_matches, cp_count, FALSE);
+ this_array_ptr = (AV**) av_fetch(multi_char_matches,
+ cp_count, FALSE);
this_array = *this_array_ptr;
}
else {
this_array = newAV();
- av_store(multi_char_matches, cp_count, (SV*) this_array);
+ av_store(multi_char_matches, cp_count,
+ (SV*) this_array);
}
av_push(this_array, multi_fold);
}
- /* This element should not be processed further in this class */
+ /* This element should not be processed further in this
+ * class */
element_count--;
value = save_value;
prevvalue = save_prevvalue;
AV** this_array_ptr;
SV* this_sequence;
- this_array_ptr = (AV**) av_fetch(multi_char_matches, cp_count, FALSE);
- while ((this_sequence = av_pop(*this_array_ptr)) != &PL_sv_undef) {
+ this_array_ptr = (AV**) av_fetch(multi_char_matches,
+ cp_count, FALSE);
+ while ((this_sequence = av_pop(*this_array_ptr)) !=
+ &PL_sv_undef)
+ {
if (! first_time) {
sv_catpv(substitute_parse, "|");
}
ret = reg(pRExC_state, 1, ®_flags, depth+1);
- *flagp |= reg_flags&(HASWIDTH|SPSTART|POSTPONED);
+ *flagp |= reg_flags&(HASWIDTH|SIMPLE|SPSTART|POSTPONED);
RExC_parse = save_parse;
RExC_end = save_end;
RExC_in_multi_char_class = 0;
SvREFCNT_dec(multi_char_matches);
+ SvREFCNT_dec(listsv);
return ret;
}
*flagp |= HASWIDTH|SIMPLE;
break;
- case ANYOF_MAX:
+ case ANYOF_UNIPROP:
break;
case ANYOF_NBLANK:
ret = reg_node(pRExC_state, op);
- if (PL_regkind[op] == POSIXD) {
+ if (PL_regkind[op] == POSIXD || PL_regkind[op] == NPOSIXD) {
if (! SIZE_ONLY) {
FLAGS(ret) = arg;
}
RExC_parse = (char *) cur_parse;
+ SvREFCNT_dec(posixes);
SvREFCNT_dec(listsv);
+ SvREFCNT_dec(cp_list);
return ret;
}
}
/* If the highest code point is within Latin1, we can use the
* compiled-in Alphas list, and not have to go out to disk. This
- * yields two false positives, the masculine and feminine oridinal
+ * yields two false positives, the masculine and feminine ordinal
* indicators, which are weeded out below using the
* IS_IN_SOME_FOLD_L1() macro */
if (invlist_highest(cp_list) < 256) {
* to force that */
if (! PL_utf8_tofold) {
U8 dummy[UTF8_MAXBYTES+1];
- STRLEN dummy_len;
/* This string is just a short named one above \xff */
- to_utf8_fold((U8*) HYPHEN_UTF8, dummy, &dummy_len);
+ to_utf8_fold((U8*) HYPHEN_UTF8, dummy, NULL);
assert(PL_utf8_tofold); /* Verify that worked */
}
PL_utf8_foldclosures =
- _swash_inversion_hash(PL_utf8_tofold);
+ _swash_inversion_hash(PL_utf8_tofold);
}
}
U8 foldbuf[UTF8_MAXBYTES_CASE+1];
STRLEN foldlen;
- UV f;
SV** listp;
if (j < 256) {
* hard-coded for it. First, get its fold. This is the simple
* fold, as the multi-character folds have been handled earlier
* and separated out */
- f = _to_uni_fold_flags(j, foldbuf, &foldlen,
- ((LOC)
- ? FOLD_FLAGS_LOCALE
- : (ASCII_FOLD_RESTRICTED)
- ? FOLD_FLAGS_NOMIX_ASCII
- : 0));
-
- /* Single character fold of above Latin1. Add everything
- * in its fold closure to the list that this node should
- * match */
- /* The fold closures data structure is a hash with the keys
- * being every character that is folded to, like 'k', and
- * the values each an array of everything that folds to its
- * key. e.g. [ 'k', 'K', KELVIN_SIGN ] */
- if ((listp = hv_fetch(PL_utf8_foldclosures,
- (char *) foldbuf, foldlen, FALSE)))
- {
- AV* list = (AV*) *listp;
- IV k;
- for (k = 0; k <= av_len(list); k++) {
- SV** c_p = av_fetch(list, k, FALSE);
- UV c;
- if (c_p == NULL) {
- Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
- }
- c = SvUV(*c_p);
-
- /* /aa doesn't allow folds between ASCII and non-;
- * /l doesn't allow them between above and below
- * 256 */
- if ((ASCII_FOLD_RESTRICTED
- && (isASCII(c) != isASCII(j)))
- || (LOC && ((c < 256) != (j < 256))))
- {
- continue;
- }
+ _to_uni_fold_flags(j, foldbuf, &foldlen,
+ ((LOC)
+ ? FOLD_FLAGS_LOCALE
+ : (ASCII_FOLD_RESTRICTED)
+ ? FOLD_FLAGS_NOMIX_ASCII
+ : 0));
+
+ /* Single character fold of above Latin1. Add everything in
+ * its fold closure to the list that this node should match.
+ * The fold closures data structure is a hash with the keys
+ * being the UTF-8 of every character that is folded to, like
+ * 'k', and the values each an array of all code points that
+ * fold to its key. e.g. [ 'k', 'K', KELVIN_SIGN ].
+ * Multi-character folds are not included */
+ if ((listp = hv_fetch(PL_utf8_foldclosures,
+ (char *) foldbuf, foldlen, FALSE)))
+ {
+ AV* list = (AV*) *listp;
+ IV k;
+ for (k = 0; k <= av_len(list); k++) {
+ SV** c_p = av_fetch(list, k, FALSE);
+ UV c;
+ if (c_p == NULL) {
+ Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
+ }
+ c = SvUV(*c_p);
- /* Folds involving non-ascii Latin1 characters
- * under /d are added to a separate list */
- if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
- {
- cp_list = add_cp_to_invlist(cp_list, c);
- }
- else {
- depends_list = add_cp_to_invlist(depends_list, c);
- }
- }
- }
+ /* /aa doesn't allow folds between ASCII and non-; /l
+ * doesn't allow them between above and below 256 */
+ if ((ASCII_FOLD_RESTRICTED
+ && (isASCII(c) != isASCII(j)))
+ || (LOC && ((c < 256) != (j < 256))))
+ {
+ continue;
+ }
+
+ /* Folds involving non-ascii Latin1 characters
+ * under /d are added to a separate list */
+ if (isASCII(c) || c > 255 || AT_LEAST_UNI_SEMANTICS)
+ {
+ cp_list = add_cp_to_invlist(cp_list, c);
+ }
+ else {
+ depends_list = add_cp_to_invlist(depends_list, c);
+ }
+ }
+ }
}
}
SvREFCNT_dec(fold_intersection);
* folded until runtime */
/* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
- * at compile time. Besides not inverting folded locale now, we can't invert
- * if there are things such as \w, which aren't known until runtime */
+ * at compile time. Besides not inverting folded locale now, we can't
+ * invert if there are things such as \w, which aren't known until runtime
+ * */
if (invert
&& ! (LOC && (FOLD || (ANYOF_FLAGS(ret) & ANYOF_CLASS)))
&& ! depends_list
* fetching) */
if (FOLD && LOC)
{
- ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD;
+ ANYOF_FLAGS(ret) |= ANYOF_LOC_FOLD;
}
/* Some character classes are equivalent to other nodes. Such nodes take
alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, 0, value);
}
+ SvREFCNT_dec(cp_list);
SvREFCNT_dec(listsv);
return ret;
}
* av[1] if NULL, is a placeholder to later contain the swash computed
* from av[0]. But if no further computation need be done, the
* swash is stored there now.
- * av[2] is always NULL
- * av[3] stores the cp_list inversion list for use in addition or
+ * av[2] stores the cp_list inversion list for use in addition or
* instead of av[0]; used only if av[1] is NULL
- * av[4] is set if any component of the class is from a user-defined
+ * av[3] is set if any component of the class is from a user-defined
* property; used only if av[1] is NULL */
AV * const av = newAV();
SV *rv;
av_store(av, 0, (HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION)
? listsv
- : &PL_sv_undef);
+ : (SvREFCNT_dec(listsv), &PL_sv_undef));
if (swash) {
av_store(av, 1, swash);
SvREFCNT_dec(cp_list);
else {
av_store(av, 1, NULL);
if (cp_list) {
- av_store(av, 3, cp_list);
- av_store(av, 4, newSVuv(has_user_defined_property));
+ av_store(av, 2, cp_list);
+ av_store(av, 3, newSVuv(has_user_defined_property));
}
}
- av_store(av, 2, NULL);
rv = newRV_noinc(MUTABLE_SV(av));
n = add_data(pRExC_state, 1, "s");
RExC_rxi->data->data[n] = (void*)rv;
/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
static const char * const anyofs[] = {
- "\\w",
- "\\W",
- "\\s",
- "\\S",
- "\\d",
- "\\D",
- "[:alnum:]",
- "[:^alnum:]",
+#if _CC_WORDCHAR != 0 || _CC_DIGIT != 1 || _CC_ALPHA != 2 || _CC_LOWER != 3 \
+ || _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6 || _CC_ALNUMC != 7 \
+ || _CC_GRAPH != 8 || _CC_SPACE != 9 || _CC_BLANK != 10 \
+ || _CC_XDIGIT != 11 || _CC_PSXSPC != 12 || _CC_CNTRL != 13 \
+ || _CC_ASCII != 14 || _CC_VERTSPACE != 15
+ #error Need to adjust order of anyofs[]
+#endif
+ "[\\w]",
+ "[\\W]",
+ "[\\d]",
+ "[\\D]",
"[:alpha:]",
"[:^alpha:]",
- "[:ascii:]",
- "[:^ascii:]",
- "[:cntrl:]",
- "[:^cntrl:]",
- "[:graph:]",
- "[:^graph:]",
"[:lower:]",
"[:^lower:]",
- "[:print:]",
- "[:^print:]",
- "[:punct:]",
- "[:^punct:]",
"[:upper:]",
"[:^upper:]",
+ "[:punct:]",
+ "[:^punct:]",
+ "[:print:]",
+ "[:^print:]",
+ "[:alnum:]",
+ "[:^alnum:]",
+ "[:graph:]",
+ "[:^graph:]",
+ "[\\s]",
+ "[\\S]",
+ "[:blank:]",
+ "[:^blank:]",
"[:xdigit:]",
"[:^xdigit:]",
"[:space:]",
"[:^space:]",
- "[:blank:]",
- "[:^blank:]"
+ "[:cntrl:]",
+ "[:^cntrl:]",
+ "[:ascii:]",
+ "[:^ascii:]",
+ "[\\v]",
+ "[\\V]"
};
RXi_GET_DECL(prog,progi);
GET_RE_DEBUG_FLAGS_DECL;
if (flags & ANYOF_LOCALE)
sv_catpvs(sv, "{loc}");
- if (flags & ANYOF_LOC_NONBITMAP_FOLD)
+ if (flags & ANYOF_LOC_FOLD)
sv_catpvs(sv, "{i}");
Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
if (flags & ANYOF_INVERT)
Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
}
- else if (k == POSIXD) {
+ else if (k == POSIXD || k == NPOSIXD) {
U8 index = FLAGS(o) * 2;
if (index > (sizeof(anyofs) / sizeof(anyofs[0]))) {
Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
Perl_re_intuit_string(pTHX_ REGEXP * const r)
{ /* Assume that RE_INTUIT is set */
dVAR;
- struct regexp *const prog = (struct regexp *)SvANY(r);
+ struct regexp *const prog = ReANY(r);
GET_RE_DEBUG_FLAGS_DECL;
PERL_ARGS_ASSERT_RE_INTUIT_STRING;
Perl_pregfree2(pTHX_ REGEXP *rx)
{
dVAR;
- struct regexp *const r = (struct regexp *)SvANY(rx);
+ struct regexp *const r = ReANY(rx);
GET_RE_DEBUG_FLAGS_DECL;
PERL_ARGS_ASSERT_PREGFREE2;
} else {
CALLREGFREE_PVT(rx); /* free the private data */
SvREFCNT_dec(RXp_PAREN_NAMES(r));
+ Safefree(r->xpv_len_u.xpvlenu_pv);
}
if (r->substrs) {
SvREFCNT_dec(r->anchored_substr);
Safefree(r->substrs);
}
RX_MATCH_COPY_FREE(rx);
-#ifdef PERL_OLD_COPY_ON_WRITE
+#ifdef PERL_ANY_COW
SvREFCNT_dec(r->saved_copy);
#endif
Safefree(r->offs);
SvREFCNT_dec(r->qr_anoncv);
+ rx->sv_u.svu_rx = 0;
}
/* reg_temp_copy()
Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
{
struct regexp *ret;
- struct regexp *const r = (struct regexp *)SvANY(rx);
+ struct regexp *const r = ReANY(rx);
+ const bool islv = ret_x && SvTYPE(ret_x) == SVt_PVLV;
PERL_ARGS_ASSERT_REG_TEMP_COPY;
if (!ret_x)
ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
- ret = (struct regexp *)SvANY(ret_x);
+ else {
+ SvOK_off((SV *)ret_x);
+ if (islv) {
+ /* For PVLVs, SvANY points to the xpvlv body while sv_u points
+ to the regexp. (For SVt_REGEXPs, sv_upgrade has already
+ made both spots point to the same regexp body.) */
+ REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
+ assert(!SvPVX(ret_x));
+ ret_x->sv_u.svu_rx = temp->sv_any;
+ temp->sv_any = NULL;
+ SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
+ SvREFCNT_dec(temp);
+ /* SvCUR still resides in the xpvlv struct, so the regexp copy-
+ ing below will not set it. */
+ SvCUR_set(ret_x, SvCUR(rx));
+ }
+ }
+ /* This ensures that SvTHINKFIRST(sv) is true, and hence that
+ sv_force_normal(sv) is called. */
+ SvFAKE_on(ret_x);
+ ret = ReANY(ret_x);
- (void)ReREFCNT_inc(rx);
- /* We can take advantage of the existing "copied buffer" mechanism in SVs
- by pointing directly at the buffer, but flagging that the allocated
- space in the copy is zero. As we've just done a struct copy, it's now
- a case of zero-ing that, rather than copying the current length. */
- SvPV_set(ret_x, RX_WRAPPED(rx));
- SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8);
+ SvFLAGS(ret_x) |= SvUTF8(rx);
+ /* We share the same string buffer as the original regexp, on which we
+ hold a reference count, incremented when mother_re is set below.
+ The string pointer is copied here, being part of the regexp struct.
+ */
memcpy(&(ret->xpv_cur), &(r->xpv_cur),
sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
- SvLEN_set(ret_x, 0);
- SvSTASH_set(ret_x, NULL);
- SvMAGIC_set(ret_x, NULL);
if (r->offs) {
const I32 npar = r->nparens+1;
Newx(ret->offs, npar, regexp_paren_pair);
anchored or float namesakes, and don't hold a second reference. */
}
RX_MATCH_COPIED_off(ret_x);
-#ifdef PERL_OLD_COPY_ON_WRITE
+#ifdef PERL_ANY_COW
ret->saved_copy = NULL;
#endif
- ret->mother_re = rx;
+ ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
SvREFCNT_inc_void(ret->qr_anoncv);
return ret_x;
Perl_regfree_internal(pTHX_ REGEXP * const rx)
{
dVAR;
- struct regexp *const r = (struct regexp *)SvANY(rx);
+ struct regexp *const r = ReANY(rx);
RXi_GET_DECL(r,ri);
GET_RE_DEBUG_FLAGS_DECL;
{
dVAR;
I32 npar;
- const struct regexp *r = (const struct regexp *)SvANY(sstr);
- struct regexp *ret = (struct regexp *)SvANY(dstr);
+ const struct regexp *r = ReANY(sstr);
+ struct regexp *ret = ReANY(dstr);
PERL_ARGS_ASSERT_RE_DUP_GUTS;
ret->subbeg = SAVEPVN(ret->subbeg, ret->sublen);
else
ret->subbeg = NULL;
-#ifdef PERL_OLD_COPY_ON_WRITE
+#ifdef PERL_ANY_COW
ret->saved_copy = NULL;
#endif
- if (ret->mother_re) {
- if (SvPVX_const(dstr) == SvPVX_const(ret->mother_re)) {
- /* Our storage points directly to our mother regexp, but that's
+ /* Whether mother_re be set or no, we need to copy the string. We
+ cannot refrain from copying it when the storage points directly to
+ our mother regexp, because that's
1: a buffer in a different thread
2: something we no longer hold a reference on
so we need to copy it locally. */
- /* Note we need to use SvCUR(), rather than
- SvLEN(), on our mother_re, because it, in
- turn, may well be pointing to its own mother_re. */
- SvPV_set(dstr, SAVEPVN(SvPVX_const(ret->mother_re),
- SvCUR(ret->mother_re)+1));
- SvLEN_set(dstr, SvCUR(ret->mother_re)+1);
- }
- ret->mother_re = NULL;
- }
+ RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED(sstr), SvCUR(sstr)+1);
+ ret->mother_re = NULL;
ret->gofs = 0;
}
#endif /* PERL_IN_XSUB_RE */
Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
{
dVAR;
- struct regexp *const r = (struct regexp *)SvANY(rx);
+ struct regexp *const r = ReANY(rx);
regexp_internal *reti;
int len;
RXi_GET_DECL(r,ri);
- regnext - dig the "next" pointer out of a node
*/
regnode *
-Perl_regnext(pTHX_ register regnode *p)
+Perl_regnext(pTHX_ regnode *p)
{
dVAR;
I32 offset;
PL_reg_leftiter = 0;
PL_reg_poscache = NULL;
PL_reg_poscache_size = 0;
-#ifdef PERL_OLD_COPY_ON_WRITE
+#ifdef PERL_ANY_COW
PL_nrs = NULL;
#endif
}
#endif
-static void
-clear_re(pTHX_ void *r)
-{
- dVAR;
- ReREFCNT_dec((REGEXP *)r);
-}
-
#ifdef DEBUGGING
STATIC void