This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
add regmatch_eval_state struct
authorDavid Mitchell <davem@iabyn.com>
Sun, 19 May 2013 08:38:23 +0000 (09:38 +0100)
committerDavid Mitchell <davem@iabyn.com>
Sun, 2 Jun 2013 21:28:51 +0000 (22:28 +0100)
Replace several PL_reg* vars with a new struct. This is part of the
goal of removing all global regex state.

These particular vars are used in the case of a regex with (?{}) code
blocks. In this case, when the code in a block is called, various bits of
state (such as $1, pos($_)) are temporarily set up, even though the match
has not yet completed.

This involves updating the current PL_curpm to point to a fake PMOP which
points to the regex currently being executed. That regex has all its
current fields that are associated with captures (such as subbeg)
temporarily saved and overwritten with the current partial match results.
Similarly, $_ is temporarily aliased to the current match string, and any
old pos() position is saved. This saving was formerly done to the various
PL_reg* vars.

When the regex has finished executing (or if the code block croaks),
its fields are restored to the original values. Since this can happen in a
croak, it may be done using SAVEDESTRUCTOR_X() on the save stack. This
precludes just moving the PL_reg* vars into the regmatch_info struct,
since that is just allocated as a local var in regexec_flags(), and would
have already been abandoned and possibly overwritten after the croak and
longjmp, but before the SAVEDESTRUCTOR_X() action is taken.

So instead we put all the vars into new struct, and malloc that on entry to
the regex engine when we know we need to copy the various fields.
We save a pointer to that in the regmatch_info struct, as well as passing
it to SAVEDESTRUCTOR_X(). The destructor may get called up to twice in the
non-croak case: first it's called explicitly at the end of regexec_flags(),
which restores subbeg etc; then again from the savestack, which just
free()s the struct. In the croak case, it's called just once, and does
both the restoring and the freeing.

The vars / PL_reg_state fields this commit eliminates are:

    re_state_eval_setup_done
    PL_reg_oldsaved
    PL_reg_oldsavedlen
    PL_reg_oldsavedoffset
    PL_reg_oldsavedcoffset
    PL_reg_magic
    PL_reg_oldpos
    PL_nrs
    PL_reg_oldcurpm

perl.c
regcomp.c
regexec.c
regexp.h
sv.c

diff --git a/perl.c b/perl.c
index 6611a7a..06953c6 100644 (file)
--- a/perl.c
+++ b/perl.c
@@ -3625,7 +3625,7 @@ S_init_interp(pTHX)
 
     /* As these are inside a structure, PERLVARI isn't capable of initialising
        them  */
-    PL_reg_oldcurpm = PL_reg_curpm = NULL;
+    PL_reg_curpm = NULL;
     PL_reg_poscache = PL_reg_starttry = NULL;
 }
 
index ab86f4f..bccac5f 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -15469,17 +15469,10 @@ Perl_save_re_context(pTHX)
 
     Copy(&PL_reg_state, state, 1, struct re_save_state);
 
-    PL_reg_oldsaved = NULL;
-    PL_reg_oldsavedlen = 0;
-    PL_reg_oldsavedoffset = 0;
-    PL_reg_oldsavedcoffset = 0;
     PL_reg_maxiter = 0;
     PL_reg_leftiter = 0;
     PL_reg_poscache = NULL;
     PL_reg_poscache_size = 0;
-#ifdef PERL_ANY_COW
-    PL_nrs = NULL;
-#endif
 
     /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
     if (PL_curpm) {
index e64cb26..b2d3951 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -651,6 +651,7 @@ Perl_re_intuit_start(pTHX_
        goto fail;
     }
 
+    reginfo->eval_state = NULL;
     reginfo->strbeg = strbeg;
     reginfo->strend = strend;
     reginfo->is_utf8_pat = is_utf8_pat;
@@ -2091,6 +2092,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
     }
 
     multiline = prog->extflags & RXf_PMf_MULTILINE;
+
+    reginfo->eval_state = NULL;
     reginfo->prog = rx;         /* Yes, sorry that this is confusing.  */
     reginfo->intuit = 0;
 
@@ -2115,7 +2118,6 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
     }
 
     RX_MATCH_TAINTED_off(rx);
-    PL_reg_state.re_state_eval_setup_done = FALSE;
     PL_reg_maxiter = 0;
 
     reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx));
@@ -2602,8 +2604,11 @@ got_it:
     );
     Safefree(swap);
 
-    if (PL_reg_state.re_state_eval_setup_done)
-       S_restore_eval_state(aTHX_ prog);
+    if (reginfo->eval_state) {
+        reginfo->eval_state->direct = TRUE;
+       S_restore_eval_state(aTHX_ reginfo->eval_state);
+    }
+
     if (RXp_PAREN_NAMES(prog)) 
         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
 
@@ -2731,8 +2736,12 @@ got_it:
 phooey:
     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
                          PL_colors[4], PL_colors[5]));
-    if (PL_reg_state.re_state_eval_setup_done)
-       S_restore_eval_state(aTHX_ prog);
+
+    if (reginfo->eval_state) {
+        reginfo->eval_state->direct = TRUE;
+       S_restore_eval_state(aTHX_ reginfo->eval_state);
+    }
+
     if (swap) {
         /* we failed :-( roll it back */
        DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
@@ -2751,7 +2760,7 @@ phooey:
 /* Set which rex is pointed to by PL_reg_state, handling ref counting.
  * Do inc before dec, in case old and new rex are the same */
 #define SET_reg_curpm(Re2) \
-    if (PL_reg_state.re_state_eval_setup_done) {    \
+    if (reginfo->eval_state) {                      \
        (void)ReREFCNT_inc(Re2);                    \
        ReREFCNT_dec(PM_GETRE(PL_reg_curpm));       \
        PM_SETRE((PL_reg_curpm), (Re2));            \
@@ -2776,8 +2785,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
 
     reginfo->cutpoint=NULL;
 
-    if ((prog->extflags & RXf_EVAL_SEEN)
-            && !PL_reg_state.re_state_eval_setup_done)
+    if ((prog->extflags & RXf_EVAL_SEEN) && !reginfo->eval_state)
         S_setup_eval_state(aTHX_ reginfo);
 
 #ifdef DEBUGGING
@@ -4914,8 +4922,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                DEBUG_STATE_r( PerlIO_printf(Perl_debug_log, 
                    "  re EVAL PL_op=0x%"UVxf"\n", PTR2UV(nop)) );
 
-               rex->offs[0].end = PL_reg_magic->mg_len =
-                                                locinput - reginfo->strbeg;
+               rex->offs[0].end = locinput - reginfo->strbeg;
+                if (reginfo->eval_state->pos_magic)
+                        reginfo->eval_state->pos_magic->mg_len
+                                        = locinput - reginfo->strbeg;
 
                 if (sv_yes_mark) {
                     SV *sv_mrk = get_sv("REGMARK", 1);
@@ -6500,7 +6510,7 @@ yes:
     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
                          PL_colors[4], PL_colors[5]));
 
-    if (PL_reg_state.re_state_eval_setup_done) {
+    if (reginfo->eval_state) {
        /* each successfully executed (?{...}) block does the equivalent of
         *   local $^R = do {...}
         * When popping the save stack, all these locals would be undone;
@@ -7481,8 +7491,16 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
 {
     MAGIC *mg;
     regexp *const rex = ReANY(reginfo->prog);
+    regmatch_eval_state *eval_state;
+
+    Newx(eval_state, 1, regmatch_eval_state);
+    assert(!reginfo->eval_state);
+    reginfo->eval_state = eval_state;
+
+    eval_state->restored = FALSE;
+    eval_state->direct   = FALSE;
+    eval_state->rex = rex;
 
-    PL_reg_state.re_state_eval_setup_done = TRUE;
     if (reginfo->sv) {
         /* Make $_ available to executed code. */
         if (reginfo->sv != DEFSV) {
@@ -7501,9 +7519,12 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
                              &PL_vtbl_mglob, NULL, 0);
             mg->mg_len = -1;
         }
-        PL_reg_magic    = mg;
-        PL_reg_oldpos   = mg->mg_len;
+        eval_state->pos_magic = mg;
+        eval_state->pos       = mg->mg_len;
     }
+    else
+        eval_state->pos_magic = NULL;
+
     if (!PL_reg_curpm) {
         Newxz(PL_reg_curpm, 1, PMOP);
 #ifdef USE_ITHREADS
@@ -7518,55 +7539,65 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
 #endif
     }
     SET_reg_curpm(reginfo->prog);
-    PL_reg_oldcurpm = PL_curpm;
+    eval_state->curpm = PL_curpm;
     PL_curpm = PL_reg_curpm;
     if (RXp_MATCH_COPIED(rex)) {
         /*  Here is a serious problem: we cannot rewrite subbeg,
             since it may be needed if this match fails.  Thus
             $` inside (?{}) could fail... */
-        PL_reg_oldsaved = rex->subbeg;
-        PL_reg_oldsavedlen = rex->sublen;
-        PL_reg_oldsavedoffset = rex->suboffset;
-        PL_reg_oldsavedcoffset = rex->suboffset;
+        eval_state->subbeg     = rex->subbeg;
+        eval_state->sublen     = rex->sublen;
+        eval_state->suboffset  = rex->suboffset;
+        eval_state->subcoffset = rex->suboffset;
 #ifdef PERL_ANY_COW
-        PL_nrs = rex->saved_copy;
+        eval_state->saved_copy = rex->saved_copy;
 #endif
         RXp_MATCH_COPIED_off(rex);
     }
     else
-        PL_reg_oldsaved = NULL;
+        eval_state->subbeg = NULL;
     rex->subbeg = (char *)reginfo->strbeg;
     rex->suboffset = 0;
     rex->subcoffset = 0;
     rex->sublen = reginfo->strend - reginfo->strbeg;
-    SAVEDESTRUCTOR_X(S_restore_eval_state, rex);
+    SAVEDESTRUCTOR_X(S_restore_eval_state, eval_state);
 }
 
 /* undo the effects of S_setup_eval_state() - can either be called
- * directly, or via a destructor */
+ * directly, or via a destructor. If we get called directly, we'll still
+ * get called again later from the destructor */
 
 static void
 S_restore_eval_state(pTHX_ void *arg)
 {
     dVAR;
-    regexp * const rex = (regexp *)arg;
-    if (PL_reg_state.re_state_eval_setup_done) {
-       if (PL_reg_oldsaved) {
-           rex->subbeg = PL_reg_oldsaved;
-           rex->sublen = PL_reg_oldsavedlen;
-           rex->suboffset = PL_reg_oldsavedoffset;
-           rex->subcoffset = PL_reg_oldsavedcoffset;
+    regmatch_eval_state * const eval_state = (regmatch_eval_state *)arg;
+    regexp * const rex = eval_state->rex;
+
+    if (!eval_state->restored) {
+       if (eval_state->subbeg) {
+           rex->subbeg     = eval_state->subbeg;
+           rex->sublen     = eval_state->sublen;
+           rex->suboffset  = eval_state->suboffset;
+           rex->subcoffset = eval_state->subcoffset;
 #ifdef PERL_ANY_COW
-           rex->saved_copy = PL_nrs;
+           rex->saved_copy = eval_state->saved_copy;
 #endif
            RXp_MATCH_COPIED_on(rex);
        }
-       PL_reg_magic->mg_len = PL_reg_oldpos;
-       PL_reg_state.re_state_eval_setup_done = FALSE;
-       PL_curpm = PL_reg_oldcurpm;
-    }  
+        if (eval_state->pos_magic)
+            eval_state->pos_magic->mg_len = eval_state->pos;
+       PL_curpm = eval_state->curpm;
+        eval_state->restored = TRUE;
+    }
+    if (eval_state->direct)
+        eval_state->direct = FALSE;
+    else
+        /* we're being called from a destructor rather than directly */
+        Safefree(eval_state);
 }
 
+
 STATIC void
 S_to_utf8_substr(pTHX_ regexp *prog)
 {
index d1f7a01..6ec6214 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -576,6 +576,29 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
 
 #define FBMrf_MULTILINE        1
 
+
+/* saved state when executing a regex that contains code blocks.
+ * These need restoring at the end of the match, or on croak().
+ * These fields can't be stored in regmatch_info since the latter is
+ * allocated directly on the stack in regexec_flags(), and they need to
+ * exist during a croak() after the stack has been unwound */
+
+typedef struct {
+    regexp *rex;
+    PMOP    *curpm;     /* saved PL_curpm */
+#ifdef PERL_ANY_COW
+    SV      *saved_copy; /* saved saved_copy field from rex */
+#endif
+    char    *subbeg;    /* saved subbeg     field from rex */
+    STRLEN  sublen;     /* saved sublen     field from rex */
+    STRLEN  suboffset;  /* saved suboffset  field from rex */
+    STRLEN  subcoffset; /* saved subcoffset field from rex */
+    MAGIC   *pos_magic; /* pos() magic attached to $_ */
+    I32     pos;        /* the original value of pos() in pos_magic */
+    bool    restored;   /* we have already undone the save */
+    bool    direct;     /* we are calling the destructor directly */
+} regmatch_eval_state;
+
 /* some basic information about the current match that is created by
  * Perl_regexec_flags and then passed to regtry(), regmatch() etc */
 
@@ -587,6 +610,7 @@ typedef struct {
     SV *sv;
     char *ganch;
     char *cutpoint;
+    regmatch_eval_state *eval_state; /* extra saved state for (?{}) */
     bool intuit;    /* re_intuit_start() is the top-level caller */
     bool is_utf8_pat;
     bool warned; /* we have issued a recursion warning; no need for more */
@@ -763,41 +787,22 @@ typedef struct regmatch_slab {
 } regmatch_slab;
 
 #define PL_reg_match_utf8      PL_reg_state.re_state_reg_match_utf8
-#define PL_reg_magic           PL_reg_state.re_state_reg_magic
-#define PL_reg_oldpos          PL_reg_state.re_state_reg_oldpos
-#define PL_reg_oldcurpm                PL_reg_state.re_state_reg_oldcurpm
 #define PL_reg_curpm           PL_reg_state.re_state_reg_curpm
-#define PL_reg_oldsaved                PL_reg_state.re_state_reg_oldsaved
-#define PL_reg_oldsavedlen     PL_reg_state.re_state_reg_oldsavedlen
-#define PL_reg_oldsavedoffset  PL_reg_state.re_state_reg_oldsavedoffset
-#define PL_reg_oldsavedcoffset PL_reg_state.re_state_reg_oldsavedcoffset
 #define PL_reg_maxiter         PL_reg_state.re_state_reg_maxiter
 #define PL_reg_leftiter                PL_reg_state.re_state_reg_leftiter
 #define PL_reg_poscache                PL_reg_state.re_state_reg_poscache
 #define PL_reg_poscache_size   PL_reg_state.re_state_reg_poscache_size
 #define PL_reg_starttry                PL_reg_state.re_state_reg_starttry
-#define PL_nrs                 PL_reg_state.re_state_nrs
 
 struct re_save_state {
-    bool re_state_eval_setup_done;     /* from regexec.c */
     bool re_state_reg_match_utf8;      /* from regexec.c */
     /* Space for U8 */
-    I32 re_state_reg_oldpos;           /* from regexec.c */
     I32 re_state_reg_maxiter;          /* max wait until caching pos */
     I32 re_state_reg_leftiter;         /* wait until caching pos */
-    MAGIC *re_state_reg_magic;         /* from regexec.c */
-    PMOP *re_state_reg_oldcurpm;       /* from regexec.c */
     PMOP *re_state_reg_curpm;          /* from regexec.c */
-    char *re_state_reg_oldsaved;       /* old saved substr during match */
-    STRLEN re_state_reg_oldsavedlen;   /* old length of saved substr during match */
-    STRLEN re_state_reg_oldsavedoffset;        /* old offset of saved substr during match */
-    STRLEN re_state_reg_oldsavedcoffset;/* old coffset of saved substr during match */
     STRLEN re_state_reg_poscache_size; /* size of pos cache of WHILEM */
     char *re_state_reg_poscache;       /* cache of pos of WHILEM */
     char *re_state_reg_starttry;       /* from regexec.c */
-#ifdef PERL_ANY_COW
-    SV *re_state_nrs;                  /* was placeholder: unused since 5.8.0 (5.7.2 patch #12027 for bug ID 20010815.012). Used to save rx->saved_copy */
-#endif
 };
 
 #define SAVESTACK_ALLOC_FOR_RE_SAVE_STATE \
diff --git a/sv.c b/sv.c
index 84f3115..85110ac 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -12901,21 +12901,9 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
                Copy(old_state, new_state, 1, struct re_save_state);
                ix -= SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
 
-#ifdef PERL_ANY_COW
-               new_state->re_state_nrs
-                   = sv_dup(old_state->re_state_nrs, param);
-#endif
-               new_state->re_state_reg_magic
-                   = (MAGIC*) any_dup(old_state->re_state_reg_magic, 
-                              proto_perl);
-               new_state->re_state_reg_oldcurpm
-                   = (PMOP*) any_dup(old_state->re_state_reg_oldcurpm, 
-                             proto_perl);
                new_state->re_state_reg_curpm
                    = (PMOP*)  any_dup(old_state->re_state_reg_curpm, 
                               proto_perl);
-               new_state->re_state_reg_oldsaved
-                   = pv_dup(old_state->re_state_reg_oldsaved);
                new_state->re_state_reg_poscache
                    = pv_dup(old_state->re_state_reg_poscache);
                new_state->re_state_reg_starttry