"Copy on write: regexp capture, type %d\n",
(int) SvTYPE(sv));
}
- /* skip creating new COW SV if a valid one already exists */
- if (! ( prog->saved_copy
- && SvIsCOW(sv)
- && SvPOKp(sv)
- && SvIsCOW(prog->saved_copy)
- && SvPOKp(prog->saved_copy)
- && SvPVX(sv) == SvPVX(prog->saved_copy)))
+ /* Create a new COW SV to share the match string and store
+ * in saved_copy, unless the current COW SV in saved_copy
+ * is valid and suitable for our purpose */
+ if (( prog->saved_copy
+ && SvIsCOW(prog->saved_copy)
+ && SvPOKp(prog->saved_copy)
+ && SvIsCOW(sv)
+ && SvPOKp(sv)
+ && SvPVX(sv) == SvPVX(prog->saved_copy)))
{
+ /* just reuse saved_copy SV */
+ if (RXp_MATCH_COPIED(prog)) {
+ Safefree(prog->subbeg);
+ RXp_MATCH_COPIED_off(prog);
+ }
+ }
+ else {
+ /* create new COW SV to share string */
RX_MATCH_COPY_FREE(rx);
prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
- prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
- assert (SvPOKp(prog->saved_copy));
}
+ prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
+ assert (SvPOKp(prog->saved_copy));
prog->sublen = strend - strbeg;
prog->suboffset = 0;
prog->subcoffset = 0;
* itself is accessed via the pointers above */
/* data: May be used for some additional optimizations.
Currently unused. */
-/* nosave: For optimizations. */
+/* flags: For optimizations. See REXEC_* in regexp.h */
{
dVAR;
if (prog->extflags & RXf_GPOS_SEEN) {
MAGIC *mg;
+ /* set reginfo->ganch, the position where \G can match */
+
+ reginfo->ganch =
+ (flags & REXEC_IGNOREPOS)
+ ? stringarg /* use start pos rather than pos() */
+ : (sv && (mg = mg_find_mglob(sv)) && mg->mg_len >= 0)
+ ? strbeg + mg->mg_len /* Defined pos() */
+ : strbeg; /* pos() not defined; use start of string */
+
+ DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
+ "GPOS ganch set to strbeg[%"IVdf"]\n", reginfo->ganch - strbeg));
+
/* in the presence of \G, we may need to start looking earlier in
* the string than the suggested start point of stringarg:
* if gofs->prog is set, then that's a known, fixed minimum
* or if the minimum offset isn't known, then we have to go back
* to the start of the string, e.g. /w+\G/
*/
- if (prog->gofs) {
+
+ if (prog->extflags & RXf_ANCH_GPOS) {
+ startpos = reginfo->ganch - prog->gofs;
+ if (startpos <
+ ((flags & REXEC_FAIL_ON_UNDERFLOW) ? stringarg : strbeg))
+ {
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "fail: ganch-gofs before earliest possible start\n"));
+ return 0;
+ }
+ }
+ else if (prog->gofs) {
if (startpos - prog->gofs < strbeg)
startpos = strbeg;
else
}
else if (prog->extflags & RXf_GPOS_FLOAT)
startpos = strbeg;
-
- /* set reginfo->ganch, the position where \G can match */
-
- reginfo->ganch =
- (flags & REXEC_IGNOREPOS)
- ? stringarg /* use start pos rather than pos() */
- : (sv && (mg = mg_find_mglob(sv)) && mg->mg_len >= 0)
- ? strbeg + mg->mg_len /* Defined pos() */
- : strbeg; /* pos() not defined; use start of string */
-
- DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
- "GPOS ganch set to strbeg[%"IVdf"]\n", reginfo->ganch - strbeg));
}
minlen = prog->minlen;
return 0;
}
+ /* at the end of this function, we'll do a LEAVE_SCOPE(oldsave),
+ * which will call destuctors to reset PL_regmatch_state, free higher
+ * PL_regmatch_slabs, and clean up regmatch_info_aux and
+ * regmatch_info_aux_eval */
+
+ oldsave = PL_savestack_ix;
+
s = startpos;
if ((prog->extflags & RXf_USE_INTUIT)
}
}
-
- /* at the end of this function, we'll do a LEAVE_SCOPE(oldsave),
- * which will call destuctors to reset PL_regmatch_state, free higher
- * PL_regmatch_slabs, and clean up regmatch_info_aux and
- * regmatch_info_aux_eval */
-
- oldsave = PL_savestack_ix;
-
multiline = prog->extflags & RXf_PMf_MULTILINE;
if (strend - s < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
goto phooey;
} else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
{
- /* the warning about reginfo->ganch being used without initialization
- is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
- and we only enter this block when the same bit is set. */
- char *tmp_s = reginfo->ganch - prog->gofs;
-
- if (s <= tmp_s && regtry(reginfo, &tmp_s))
+ /* For anchored \G, the only position it can match from is
+ * (ganch-gofs); we already set startpos to this above; if intuit
+ * moved us on from there, we can't possibly succeed */
+ assert(startpos == reginfo->ganch - prog->gofs);
+ if (s == startpos && regtry(reginfo, &s))
goto got_it;
goto phooey;
}
S_core_regclass_swash(pTHX_ const regexp *prog, const regnode* node, bool doinit, SV** listsvp)
{
/* Returns the swash for the input 'node' in the regex 'prog'.
- * If <doinit> is true, will attempt to create the swash if not already
+ * If <doinit> is 'true', will attempt to create the swash if not already
* done.
- * If <listsvp> is non-null, will return the swash initialization string in
- * it.
+ * If <listsvp> is non-null, will return the printable contents of the
+ * swash. This can be used to get debugging information even before the
+ * swash exists, by calling this function with 'doinit' set to false, in
+ * which case the components that will be used to eventually create the
+ * swash are returned (in a printable form).
* Tied intimately to how regcomp.c sets up the data structure */
dVAR;
SV *sw = NULL;
- SV *si = NULL;
+ SV *si = NULL; /* Input swash initialization string */
SV* invlist = NULL;
RXi_GET_DECL(prog,progi);
}
}
+ /* If requested, return a printable version of what this swash matches */
if (listsvp) {
SV* matches_string = newSVpvn("", 0);
- /* Use the swash, if any, which has to have incorporated into it all
- * possibilities */
+ /* The swash should be used, if possible, to get the data, as it
+ * contains the resolved data. But this function can be called at
+ * compile-time, before everything gets resolved, in which case we
+ * return the currently best available information, which is the string
+ * that will eventually be used to do that resolving, 'si' */
if ((! sw || (invlist = _get_swash_invlist(sw)) == NULL)
&& (si && si != &PL_sv_undef))
{
-
- /* If no swash, use the input initialization string, if available */
sv_catsv(matches_string, si);
}