SSize_t last_end; /* min value, <0 unless valid. */
SSize_t last_start_min;
SSize_t last_start_max;
- U8 longest; /* 0 = fixed is longest, 1 = float is longest */
+ U8 cur_is_floating; /* whether the last_* values should be set as
+ * the next fixed (0) or floating (1)
+ * substring */
/* [0] is longest fixed substring so far, [1] is longest float so far */
struct scan_data_substrs substrs[2];
);
if (data->last_found) {
+ int i;
Perl_re_printf(aTHX_
"Last:'%s' %" IVdf ":%" IVdf "/%" IVdf,
SvPVX_const(data->last_found),
(IV)data->last_start_max
);
- Perl_re_printf(aTHX_
- " %sFixed:'%s' @ %" IVdf,
- data->longest == 0 ? "*" : "",
- SvPVX_const(data->substrs[0].str),
- (IV)data->substrs[0].min_offset
- );
- S_debug_show_study_flags(aTHX_ data->substrs[0].flags," [","]");
-
- Perl_re_printf(aTHX_
- " %sFloat: '%s' @ %" IVdf "/%" IVdf,
- data->longest == 1 ? "*" : "",
- SvPVX_const(data->substrs[1].str),
- (IV)data->substrs[1].min_offset,
- (IV)data->substrs[1].max_offset
- );
- S_debug_show_study_flags(aTHX_ data->substrs[1].flags," [","]");
+ for (i = 0; i < 2; i++) {
+ Perl_re_printf(aTHX_
+ " %s%s: '%s' @ %" IVdf "/%" IVdf,
+ data->cur_is_floating == i ? "*" : "",
+ i ? "Float" : "Fixed",
+ SvPVX_const(data->substrs[i].str),
+ (IV)data->substrs[i].min_offset,
+ (IV)data->substrs[i].max_offset
+ );
+ S_debug_show_study_flags(aTHX_ data->substrs[i].flags," [","]");
+ }
}
Perl_re_printf( aTHX_ "\n");
}
/* Mark that we cannot extend a found fixed substring at this point.
- Update the longest found anchored substring and the longest found
+ Update the longest found anchored substring or the longest found
floating substrings if needed. */
STATIC void
SSize_t *minlenp, int is_inf)
{
const STRLEN l = CHR_SVLEN(data->last_found);
- SV * const longest_sv = data->substrs[data->longest].str;
+ SV * const longest_sv = data->substrs[data->cur_is_floating].str;
const STRLEN old_l = CHR_SVLEN(longest_sv);
GET_RE_DEBUG_FLAGS_DECL;
PERL_ARGS_ASSERT_SCAN_COMMIT;
if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
+ const U8 i = data->cur_is_floating;
SvSetMagicSV(longest_sv, data->last_found);
- if (data->longest == 0) { /* fixed */
- data->substrs[0].min_offset = l ? data->last_start_min : data->pos_min;
+ data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min;
+
+ if (!i) /* fixed */
data->substrs[0].max_offset = data->substrs[0].min_offset;
- if (data->flags & SF_BEFORE_EOL)
- data->substrs[0].flags |= (data->flags & SF_BEFORE_EOL);
- else
- data->substrs[0].flags &= ~SF_BEFORE_EOL;
- data->substrs[0].minlenp = minlenp;
- data->substrs[0].lookbehind = 0;
- }
else { /* float */
- data->substrs[1].min_offset = l ? data->last_start_min : data->pos_min;
data->substrs[1].max_offset = (l
? data->last_start_max
: (data->pos_delta > SSize_t_MAX - data->pos_min
if (is_inf
|| (STRLEN)data->substrs[1].max_offset > (STRLEN)SSize_t_MAX)
data->substrs[1].max_offset = SSize_t_MAX;
+ }
- if (data->flags & SF_BEFORE_EOL)
- data->substrs[1].flags |= (data->flags & SF_BEFORE_EOL);
- else
- data->substrs[1].flags &= ~SF_BEFORE_EOL;
- data->substrs[1].minlenp = minlenp;
- data->substrs[1].lookbehind = 0;
- }
+ if (data->flags & SF_BEFORE_EOL)
+ data->substrs[i].flags |= (data->flags & SF_BEFORE_EOL);
+ else
+ data->substrs[i].flags &= ~SF_BEFORE_EOL;
+ data->substrs[i].minlenp = minlenp;
+ data->substrs[i].lookbehind = 0;
}
+
SvCUR_set(data->last_found, 0);
{
SV * const sv = data->last_found;
else
data->pos_delta += max1 - min1;
if (max1 != min1 || is_inf)
- data->longest = 1 /*float*/;
+ data->cur_is_floating = 1;
}
min += min1;
if (delta == SSize_t_MAX
* */
if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
- data->longest = 1; /* float */
+ data->cur_is_floating = 1;
}
is_inf = is_inf_internal = 1;
if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
}
data->pos_delta += min_subtract;
if (min_subtract) {
- data->longest = 1; /* float */
+ data->cur_is_floating = 1; /* float */
}
}
if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
/* Cannot extend fixed substrings */
- data->longest = 1; /* float */
+ data->cur_is_floating = 1; /* float */
}
is_inf = is_inf_internal = 1;
scan = regnext(scan);
}
if (!scan) /* It was not CURLYX, but CURLY. */
scan = next;
- if (!(flags & SCF_TRIE_DOING_RESTUDY)
+ if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
/* ? quantifier ok, except for (?{ ... }) */
&& (next_is_eval || !(mincount == 0 && maxcount == 1))
&& (minnext == 0) && (deltanext == 0)
? SSize_t_MAX
: data->pos_min + data->pos_delta - last_chrs;
}
- data->longest = 1; /* float */
+ data->cur_is_floating = 1; /* float */
}
SvREFCNT_dec(last_str);
}
if (flags & SCF_DO_SUBSTR) {
/* Cannot expect anything... */
scan_commit(pRExC_state, data, minlenp, is_inf);
- data->longest = 1; /* float */
+ data->cur_is_floating = 1; /* float */
}
is_inf = is_inf_internal = 1;
if (flags & SCF_DO_STCLASS_OR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
data->pos_min += 1;
data->pos_delta += 1;
- data->longest = 1; /* float */
+ data->cur_is_floating = 1; /* float */
}
}
else if (REGNODE_SIMPLE(OP(scan))) {
data->flags |= SF_HAS_EVAL;
data->whilem_c = data_fake.whilem_c;
if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
+ int i;
if (RExC_rx->minlen<*minnextp)
RExC_rx->minlen=*minnextp;
scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
SvREFCNT_dec_NN(data_fake.last_found);
- if (data_fake.substrs[0].minlenp != minlenp) {
- data->substrs[0].min_offset = data_fake.substrs[0].min_offset;
- data->substrs[0].max_offset = data_fake.substrs[0].max_offset;
- data->substrs[0].minlenp = data_fake.substrs[0].minlenp;
- data->substrs[0].lookbehind += scan->flags;
- }
-
- if (data_fake.substrs[1].minlenp != minlenp) {
- data->substrs[1].minlenp = data_fake.substrs[1].minlenp;
- data->substrs[1].min_offset = data_fake.substrs[1].min_offset;
- data->substrs[1].max_offset = data_fake.substrs[1].max_offset;
- data->substrs[1].lookbehind += scan->flags;
+ for (i = 0; i < 2; i++) {
+ if (data_fake.substrs[i].minlenp != minlenp) {
+ data->substrs[i].min_offset =
+ data_fake.substrs[i].min_offset;
+ data->substrs[i].max_offset =
+ data_fake.substrs[i].max_offset;
+ data->substrs[i].minlenp =
+ data_fake.substrs[i].minlenp;
+ data->substrs[i].lookbehind += scan->flags;
+ }
}
}
}
}
#endif
}
+
else if (OP(scan) == OPEN) {
if (stopparen != (I32)ARG(scan))
pars++;
{
if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
- data->longest = 1; /* float */
+ data->cur_is_floating = 1; /* float */
}
is_inf = is_inf_internal = 1;
if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
data->pos_min += min1;
data->pos_delta += max1 - min1;
if (max1 != min1 || is_inf)
- data->longest = 1; /* float */
+ data->cur_is_floating = 1; /* float */
}
min += min1;
if (delta != SSize_t_MAX)
data->pos_min += trie->minlen;
data->pos_delta += (trie->maxlen - trie->minlen);
if (trie->maxlen != trie->minlen)
- data->longest = 1; /* float */
+ data->cur_is_floating = 1; /* float */
}
if (trie->jump) /* no more substrings -- for now /grr*/
flags &= ~SCF_DO_SUBSTR;
STATIC bool
S_setup_longest(pTHX_ RExC_state_t *pRExC_state,
- SV** rx_utf8, SV** rx_substr, SSize_t* rx_end_shift,
+ struct reg_substr_datum *rsd,
struct scan_data_substrs *sub,
STRLEN longest_length)
{
/* copy the information about the longest from the reg_scan_data
over to the program. */
if (SvUTF8(sub->str)) {
- *rx_utf8 = sub->str;
- *rx_substr = NULL;
+ rsd->substr = NULL;
+ rsd->utf8_substr = sub->str;
} else {
- *rx_substr = sub->str;
- *rx_utf8 = NULL;
+ rsd->substr = sub->str;
+ rsd->utf8_substr = NULL;
}
/* end_shift is how many chars that must be matched that
follow this item. We calculate it ahead of time as once the
lookbehind offset is added in we lose the ability to correctly
calculate it.*/
ml = sub->minlenp ? *(sub->minlenp) : (SSize_t)longest_length;
- *rx_end_shift = ml - sub->min_offset
+ rsd->end_shift = ml - sub->min_offset
- longest_length
/* XXX SvTAIL is always false here - did you mean FBMcf_TAIL
* intead? - DAPM
/* make sure PL_bitcount bounds not exceeded */
assert(sizeof(STD_PAT_MODS) <= 8);
- Newx(p, wraplen + 1, char); /* +1 for the ending NUL */
- r->xpv_len_u.xpvlenu_pv = p;
+ p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
+ SvPOK_on(rx);
if (RExC_utf8)
SvFLAGS(rx) |= SVf_UTF8;
*p++='('; *p++='?';
if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)) { /* Only one top-level choice.
*/
SSize_t fake;
- STRLEN longest_float_length, longest_fixed_length;
+ STRLEN longest_length[2];
regnode_ssc ch_class; /* pointed to by data */
int stclass_flag;
SSize_t last_close = 0; /* pointed to by data */
regnode *first= scan;
regnode *first_next= regnext(first);
+ int i;
+
/*
* Skip introductions and multiplicators >= 1
* so that we can extract the 'meat' of the pattern that must
data.substrs[0].str = newSVpvs("");
data.substrs[1].str = newSVpvs("");
data.last_found = newSVpvs("");
- data.longest = 0; /* fixed */
+ data.cur_is_floating = 0; /* initially any found substring is fixed */
ENTER_with_name("study_chunk");
SAVEFREESV(data.substrs[0].str);
SAVEFREESV(data.substrs[1].str);
CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
- if ( RExC_npar == 1 && data.longest == 0 /*fixed */
+ if ( RExC_npar == 1 && !data.cur_is_floating
&& data.last_start_min == 0 && data.last_end > 0
&& !RExC_seen_zerolen
&& !(RExC_seen & REG_VERBARG_SEEN)
}
scan_commit(pRExC_state, &data,&minlen,0);
- longest_float_length = CHR_SVLEN(data.substrs[1].str);
-
- if (! ((SvCUR(data.substrs[0].str) /* ok to leave SvCUR */
- && data.substrs[0].min_offset == data.substrs[1].min_offset
- && SvCUR(data.substrs[0].str) == SvCUR(data.substrs[1].str)))
- && S_setup_longest (aTHX_ pRExC_state,
- &(r->float_utf8),
- &(r->float_substr),
- &(r->float_end_shift),
- &(data.substrs[1]),
- longest_float_length))
- {
- r->substrs->data[1].min_offset =
- data.substrs[1].min_offset - data.substrs[1].lookbehind;
-
- r->substrs->data[1].max_offset = data.substrs[1].max_offset;
- if (data.substrs[1].max_offset < SSize_t_MAX) /* Don't offset infinity */
- r->substrs->data[1].max_offset -= data.substrs[1].lookbehind;
-
- SvREFCNT_inc_simple_void_NN(data.substrs[1].str);
- }
- else {
- r->float_substr = r->float_utf8 = NULL;
- longest_float_length = 0;
- }
- longest_fixed_length = CHR_SVLEN(data.substrs[0].str);
-
- if (S_setup_longest (aTHX_ pRExC_state,
- &(r->anchored_utf8),
- &(r->anchored_substr),
- &(r->anchored_end_shift),
- &(data.substrs[0]),
- longest_fixed_length))
- {
- r->substrs->data[0].min_offset =
- data.substrs[0].min_offset - data.substrs[0].lookbehind;
- /* XXX this calc isn't necessary for anchored, but is done
- * for consistency with float code path */
- r->substrs->data[0].max_offset = data.substrs[0].max_offset;
+ /* XXX this is done in reverse order because that's the way the
+ * code was before it was parameterised. Don't know whether it
+ * actually needs doing in reverse order. DAPM */
+ for (i = 1; i >= 0; i--) {
+ longest_length[i] = CHR_SVLEN(data.substrs[i].str);
- if (data.substrs[0].max_offset < SSize_t_MAX) /* Don't offset infinity */
- r->substrs->data[0].max_offset -= data.substrs[0].lookbehind;
+ if ( !( i
+ && SvCUR(data.substrs[0].str) /* ok to leave SvCUR */
+ && data.substrs[0].min_offset
+ == data.substrs[1].min_offset
+ && SvCUR(data.substrs[0].str)
+ == SvCUR(data.substrs[1].str)
+ )
+ && S_setup_longest (aTHX_ pRExC_state,
+ &(r->substrs->data[i]),
+ &(data.substrs[i]),
+ longest_length[i]))
+ {
+ r->substrs->data[i].min_offset =
+ data.substrs[i].min_offset - data.substrs[i].lookbehind;
+
+ r->substrs->data[i].max_offset = data.substrs[i].max_offset;
+ /* Don't offset infinity */
+ if (data.substrs[i].max_offset < SSize_t_MAX)
+ r->substrs->data[i].max_offset -= data.substrs[i].lookbehind;
+ SvREFCNT_inc_simple_void_NN(data.substrs[i].str);
+ }
+ else {
+ r->substrs->data[i].substr = NULL;
+ r->substrs->data[i].utf8_substr = NULL;
+ longest_length[i] = 0;
+ }
+ }
- SvREFCNT_inc_simple_void_NN(data.substrs[0].str);
- }
- else {
- r->anchored_substr = r->anchored_utf8 = NULL;
- longest_fixed_length = 0;
- }
LEAVE_with_name("study_chunk");
if (ri->regstclass
&& (OP(ri->regstclass) == REG_ANY || OP(ri->regstclass) == SANY))
ri->regstclass = NULL;
- if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
+ if ((!(r->substrs->data[0].substr || r->substrs->data[0].utf8_substr)
+ || r->substrs->data[0].min_offset)
&& stclass_flag
&& ! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
&& is_ssc_worth_it(pRExC_state, data.start_class))
data.start_class = NULL;
}
- /* A temporary algorithm prefers floated substr to fixed one to dig
- * more info. */
- if (longest_fixed_length > longest_float_length) {
- r->substrs->check_ix = 0;
- r->check_end_shift = r->anchored_end_shift;
- r->check_substr = r->anchored_substr;
- r->check_utf8 = r->anchored_utf8;
- r->check_offset_min = r->substrs->data[0].min_offset;
- r->check_offset_max = r->substrs->data[0].max_offset;
- if (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS))
- r->intflags |= PREGf_NOSCAN;
- }
- else {
- r->substrs->check_ix = 1;
- r->check_end_shift = r->float_end_shift;
- r->check_substr = r->float_substr;
- r->check_utf8 = r->float_utf8;
- r->check_offset_min = r->substrs->data[1].min_offset;
- r->check_offset_max = r->substrs->data[1].max_offset;
- }
+ /* A temporary algorithm prefers floated substr to fixed one of
+ * same length to dig more info. */
+ i = (longest_length[0] <= longest_length[1]);
+ r->substrs->check_ix = i;
+ r->check_end_shift = r->substrs->data[i].end_shift;
+ r->check_substr = r->substrs->data[i].substr;
+ r->check_utf8 = r->substrs->data[i].utf8_substr;
+ r->check_offset_min = r->substrs->data[i].min_offset;
+ r->check_offset_max = r->substrs->data[i].max_offset;
+ if (!i && (r->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS)))
+ r->intflags |= PREGf_NOSCAN;
if ((r->check_substr || r->check_utf8) ) {
r->extflags |= RXf_USE_INTUIT;
}
/* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
- if ( (STRLEN)minlen < longest_float_length )
- minlen= longest_float_length;
- if ( (STRLEN)minlen < longest_fixed_length )
- minlen= longest_fixed_length;
+ if ( (STRLEN)minlen < longest_length[1] )
+ minlen= longest_length[1];
+ if ( (STRLEN)minlen < longest_length[0] )
+ minlen= longest_length[0];
*/
}
else {
CHECK_RESTUDY_GOTO_butfirst(NOOP);
- r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
- = r->float_substr = r->float_utf8 = NULL;
+ r->check_substr = NULL;
+ r->check_utf8 = NULL;
+ r->substrs->data[0].substr = NULL;
+ r->substrs->data[0].utf8_substr = NULL;
+ r->substrs->data[1].substr = NULL;
+ r->substrs->data[1].utf8_substr = NULL;
if (! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
&& is_ssc_worth_it(pRExC_state, data.start_class))
if ((flags&SIMPLE)) {
if (min == 0 && max == REG_INFTY) {
reginsert(pRExC_state, STAR, ret, depth+1);
- ret->flags = 0;
MARK_NAUGHTY(4);
RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
goto nest_check;
}
if (min == 1 && max == REG_INFTY) {
reginsert(pRExC_state, PLUS, ret, depth+1);
- ret->flags = 0;
MARK_NAUGHTY(3);
RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
goto nest_check;
ender = reg_node(pRExC_state, SUCCEED);
REGTAIL(pRExC_state, ret, ender);
reginsert(pRExC_state, SUSPEND, ret, depth+1);
- ret->flags = 0;
ender = reg_node(pRExC_state, TAIL);
REGTAIL(pRExC_state, ret, ender);
}
}
sv_catpv(substitute_parse, ")");
- RExC_parse = RExC_start = RExC_adjusted_start = SvPV(substitute_parse,
- len);
+ len = SvCUR(substitute_parse);
/* Don't allow empty number */
if (len < (STRLEN) 8) {
RExC_parse = endbrace;
vFAIL("Invalid hexadecimal number in \\N{U+...}");
}
+
+ RExC_parse = RExC_start = RExC_adjusted_start
+ = SvPV_nolen(substitute_parse);
RExC_end = RExC_parse + len;
/* The values are Unicode, and therefore not subject to recoding, but
goto loopdone;
}
p = RExC_parse;
+ RExC_parse = parse_start;
if (ender > 0xff) {
REQUIRE_UTF8(flagp);
}
* if (PASS2)
* NEXT_OFF(orig_emit) = regarglen[OPFAIL] + NODE_STEP_REGNODE;
*
+* ALSO NOTE - operand->flags will be set to 0 as well.
*/
STATIC void
S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *operand, U32 depth)
#endif
}
-
place = operand; /* Op node, where operand used to be. */
#ifdef RE_TRACK_PATTERN_OFFSETS
if (RExC_offsets) { /* MJD */
}
#endif
src = NEXTOPER(place);
+ place->flags = 0;
FILL_ADVANCE_NODE(place, op);
Zero(src, offset, regnode);
}
Perl_regdump(pTHX_ const regexp *r)
{
#ifdef DEBUGGING
+ int i;
SV * const sv = sv_newmortal();
SV *dsv= sv_newmortal();
RXi_GET_DECL(r,ri);
(void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
/* Header fields of interest. */
- if (r->anchored_substr) {
- RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->anchored_substr),
- RE_SV_DUMPLEN(r->anchored_substr), 30);
- Perl_re_printf( aTHX_
- "anchored %s%s at %" IVdf " ",
- s, RE_SV_TAIL(r->anchored_substr),
- (IV)r->anchored_offset);
- } else if (r->anchored_utf8) {
- RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->anchored_utf8),
- RE_SV_DUMPLEN(r->anchored_utf8), 30);
- Perl_re_printf( aTHX_
- "anchored utf8 %s%s at %" IVdf " ",
- s, RE_SV_TAIL(r->anchored_utf8),
- (IV)r->anchored_offset);
- }
- if (r->float_substr) {
- RE_PV_QUOTED_DECL(s, 0, dsv, SvPVX_const(r->float_substr),
- RE_SV_DUMPLEN(r->float_substr), 30);
- Perl_re_printf( aTHX_
- "floating %s%s at %" IVdf "..%" UVuf " ",
- s, RE_SV_TAIL(r->float_substr),
- (IV)r->float_min_offset, (UV)r->float_max_offset);
- } else if (r->float_utf8) {
- RE_PV_QUOTED_DECL(s, 1, dsv, SvPVX_const(r->float_utf8),
- RE_SV_DUMPLEN(r->float_utf8), 30);
- Perl_re_printf( aTHX_
- "floating utf8 %s%s at %" IVdf "..%" UVuf " ",
- s, RE_SV_TAIL(r->float_utf8),
- (IV)r->float_min_offset, (UV)r->float_max_offset);
+ for (i = 0; i < 2; i++) {
+ if (r->substrs->data[i].substr) {
+ RE_PV_QUOTED_DECL(s, 0, dsv,
+ SvPVX_const(r->substrs->data[i].substr),
+ RE_SV_DUMPLEN(r->substrs->data[i].substr),
+ 30);
+ Perl_re_printf( aTHX_
+ "%s %s%s at %" IVdf "..%" UVuf " ",
+ i ? "floating" : "anchored",
+ s,
+ RE_SV_TAIL(r->substrs->data[i].substr),
+ (IV)r->substrs->data[i].min_offset,
+ (UV)r->substrs->data[i].max_offset);
+ }
+ else if (r->substrs->data[i].utf8_substr) {
+ RE_PV_QUOTED_DECL(s, 1, dsv,
+ SvPVX_const(r->substrs->data[i].utf8_substr),
+ RE_SV_DUMPLEN(r->substrs->data[i].utf8_substr),
+ 30);
+ Perl_re_printf( aTHX_
+ "%s utf8 %s%s at %" IVdf "..%" UVuf " ",
+ i ? "floating" : "anchored",
+ s,
+ RE_SV_TAIL(r->substrs->data[i].utf8_substr),
+ (IV)r->substrs->data[i].min_offset,
+ (UV)r->substrs->data[i].max_offset);
+ }
}
+
if (r->check_substr || r->check_utf8)
Perl_re_printf( aTHX_
(const char *)
- (r->check_substr == r->float_substr
- && r->check_utf8 == r->float_utf8
+ ( r->check_substr == r->substrs->data[1].substr
+ && r->check_utf8 == r->substrs->data[1].utf8_substr
? "(checking floating" : "(checking anchored"));
if (r->intflags & PREGf_NOSCAN)
Perl_re_printf( aTHX_ " noscan");
/* add on the verb argument if there is one */
if ( ( k == VERB || OP(o) == ACCEPT || OP(o) == OPFAIL ) && o->flags) {
- Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
+ if ( ARG(o) )
+ Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
SVfARG((MUTABLE_SV(progi->data->data[ ARG( o ) ]))));
+ else
+ sv_catpvs(sv, ":NULL");
}
#else
PERL_UNUSED_CONTEXT;
} else {
CALLREGFREE_PVT(rx); /* free the private data */
SvREFCNT_dec(RXp_PAREN_NAMES(r));
- Safefree(r->xpv_len_u.xpvlenu_pv);
}
if (r->substrs) {
- SvREFCNT_dec(r->anchored_substr);
- SvREFCNT_dec(r->anchored_utf8);
- SvREFCNT_dec(r->float_substr);
- SvREFCNT_dec(r->float_utf8);
+ int i;
+ for (i = 0; i < 2; i++) {
+ SvREFCNT_dec(r->substrs->data[i].substr);
+ SvREFCNT_dec(r->substrs->data[i].utf8_substr);
+ }
Safefree(r->substrs);
}
RX_MATCH_COPY_FREE(rx);
SvREFCNT_dec(r->qr_anoncv);
if (r->recurse_locinput)
Safefree(r->recurse_locinput);
- rx->sv_u.svu_rx = 0;
}
+
/* reg_temp_copy()
- This is a hacky workaround to the structural issue of match results
+ Copy ssv to dsv, both of which should of type SVt_REGEXP or SVt_PVLV,
+ except that dsv will be created if NULL.
+
+ This function is used in two main ways. First to implement
+ $r = qr/....; $s = $$r;
+
+ Secondly, it is used as a hacky workaround to the structural issue of
+ match results
being stored in the regexp structure which is in turn stored in
PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern
could be PL_curpm in multiple contexts, and could require multiple
REGEXP *
-Perl_reg_temp_copy (pTHX_ REGEXP *ret_x, REGEXP *rx)
+Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
{
- struct regexp *ret;
- struct regexp *const r = ReANY(rx);
- const bool islv = ret_x && SvTYPE(ret_x) == SVt_PVLV;
+ struct regexp *drx;
+ struct regexp *const srx = ReANY(ssv);
+ const bool islv = dsv && SvTYPE(dsv) == SVt_PVLV;
PERL_ARGS_ASSERT_REG_TEMP_COPY;
- if (!ret_x)
- ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
+ if (!dsv)
+ dsv = (REGEXP*) newSV_type(SVt_REGEXP);
else {
- SvOK_off((SV *)ret_x);
+ SvOK_off((SV *)dsv);
if (islv) {
- /* For PVLVs, SvANY points to the xpvlv body while sv_u points
- to the regexp. (For SVt_REGEXPs, sv_upgrade has already
- made both spots point to the same regexp body.) */
+ /* For PVLVs, the head (sv_any) points to an XPVLV, while
+ * the LV's xpvlenu_rx will point to a regexp body, which
+ * we allocate here */
REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
- assert(!SvPVX(ret_x));
- ret_x->sv_u.svu_rx = temp->sv_any;
+ assert(!SvPVX(dsv));
+ ((XPV*)SvANY(dsv))->xpv_len_u.xpvlenu_rx = temp->sv_any;
temp->sv_any = NULL;
SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
SvREFCNT_dec_NN(temp);
/* SvCUR still resides in the xpvlv struct, so the regexp copy-
ing below will not set it. */
- SvCUR_set(ret_x, SvCUR(rx));
+ SvCUR_set(dsv, SvCUR(ssv));
}
}
/* This ensures that SvTHINKFIRST(sv) is true, and hence that
sv_force_normal(sv) is called. */
- SvFAKE_on(ret_x);
- ret = ReANY(ret_x);
+ SvFAKE_on(dsv);
+ drx = ReANY(dsv);
- SvFLAGS(ret_x) |= SvUTF8(rx);
+ SvFLAGS(dsv) |= SvFLAGS(ssv) & (SVf_POK|SVp_POK|SVf_UTF8);
+ SvPV_set(dsv, RX_WRAPPED(ssv));
/* We share the same string buffer as the original regexp, on which we
hold a reference count, incremented when mother_re is set below.
The string pointer is copied here, being part of the regexp struct.
*/
- memcpy(&(ret->xpv_cur), &(r->xpv_cur),
+ memcpy(&(drx->xpv_cur), &(srx->xpv_cur),
sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
- if (r->offs) {
- const I32 npar = r->nparens+1;
- Newx(ret->offs, npar, regexp_paren_pair);
- Copy(r->offs, ret->offs, npar, regexp_paren_pair);
- }
- if (r->substrs) {
- Newx(ret->substrs, 1, struct reg_substr_data);
- StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
+ if (!islv)
+ SvLEN_set(dsv, 0);
+ if (srx->offs) {
+ const I32 npar = srx->nparens+1;
+ Newx(drx->offs, npar, regexp_paren_pair);
+ Copy(srx->offs, drx->offs, npar, regexp_paren_pair);
+ }
+ if (srx->substrs) {
+ int i;
+ Newx(drx->substrs, 1, struct reg_substr_data);
+ StructCopy(srx->substrs, drx->substrs, struct reg_substr_data);
- SvREFCNT_inc_void(ret->anchored_substr);
- SvREFCNT_inc_void(ret->anchored_utf8);
- SvREFCNT_inc_void(ret->float_substr);
- SvREFCNT_inc_void(ret->float_utf8);
+ for (i = 0; i < 2; i++) {
+ SvREFCNT_inc_void(drx->substrs->data[i].substr);
+ SvREFCNT_inc_void(drx->substrs->data[i].utf8_substr);
+ }
/* check_substr and check_utf8, if non-NULL, point to either their
anchored or float namesakes, and don't hold a second reference. */
}
- RX_MATCH_COPIED_off(ret_x);
+ RX_MATCH_COPIED_off(dsv);
#ifdef PERL_ANY_COW
- ret->saved_copy = NULL;
+ drx->saved_copy = NULL;
#endif
- ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
- SvREFCNT_inc_void(ret->qr_anoncv);
- if (r->recurse_locinput)
- Newxz(ret->recurse_locinput,r->nparens + 1,char *);
+ drx->mother_re = ReREFCNT_inc(srx->mother_re ? srx->mother_re : ssv);
+ SvREFCNT_inc_void(drx->qr_anoncv);
+ if (srx->recurse_locinput)
+ Newxz(drx->recurse_locinput,srx->nparens + 1,char *);
- return ret_x;
+ return dsv;
}
#endif
+
/* regfree_internal()
Free the private data in a regexp. This is overloadable by
/* Do it this way to avoid reading from *r after the StructCopy().
That way, if any of the sv_dup_inc()s dislodge *r from the L1
cache, it doesn't matter. */
+ int i;
const bool anchored = r->check_substr
- ? r->check_substr == r->anchored_substr
- : r->check_utf8 == r->anchored_utf8;
+ ? r->check_substr == r->substrs->data[0].substr
+ : r->check_utf8 == r->substrs->data[0].utf8_substr;
Newx(ret->substrs, 1, struct reg_substr_data);
StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
- ret->anchored_substr = sv_dup_inc(ret->anchored_substr, param);
- ret->anchored_utf8 = sv_dup_inc(ret->anchored_utf8, param);
- ret->float_substr = sv_dup_inc(ret->float_substr, param);
- ret->float_utf8 = sv_dup_inc(ret->float_utf8, param);
+ for (i = 0; i < 2; i++) {
+ ret->substrs->data[i].substr =
+ sv_dup_inc(ret->substrs->data[i].substr, param);
+ ret->substrs->data[i].utf8_substr =
+ sv_dup_inc(ret->substrs->data[i].utf8_substr, param);
+ }
/* check_substr and check_utf8, if non-NULL, point to either their
anchored or float namesakes, and don't hold a second reference. */
if (ret->check_substr) {
if (anchored) {
- assert(r->check_utf8 == r->anchored_utf8);
- ret->check_substr = ret->anchored_substr;
- ret->check_utf8 = ret->anchored_utf8;
+ assert(r->check_utf8 == r->substrs->data[0].utf8_substr);
+
+ ret->check_substr = ret->substrs->data[0].substr;
+ ret->check_utf8 = ret->substrs->data[0].utf8_substr;
} else {
- assert(r->check_substr == r->float_substr);
- assert(r->check_utf8 == r->float_utf8);
- ret->check_substr = ret->float_substr;
- ret->check_utf8 = ret->float_utf8;
+ assert(r->check_substr == r->substrs->data[1].substr);
+ assert(r->check_utf8 == r->substrs->data[1].utf8_substr);
+
+ ret->check_substr = ret->substrs->data[1].substr;
+ ret->check_utf8 = ret->substrs->data[1].utf8_substr;
}
} else if (ret->check_utf8) {
if (anchored) {
- ret->check_utf8 = ret->anchored_utf8;
+ ret->check_utf8 = ret->substrs->data[0].utf8_substr;
} else {
- ret->check_utf8 = ret->float_utf8;
+ ret->check_utf8 = ret->substrs->data[1].utf8_substr;
}
}
}
1: a buffer in a different thread
2: something we no longer hold a reference on
so we need to copy it locally. */
- RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED(sstr), SvCUR(sstr)+1);
+ RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED_const(sstr), SvCUR(sstr)+1);
ret->mother_re = NULL;
}
#endif /* PERL_IN_XSUB_RE */