This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
eliminate PL_reg_start_tmp, PL_reg_start_tmpl
authorDavid Mitchell <davem@iabyn.com>
Tue, 15 May 2012 20:01:39 +0000 (21:01 +0100)
committerDavid Mitchell <davem@iabyn.com>
Wed, 13 Jun 2012 12:32:53 +0000 (13:32 +0100)
PL_reg_start_tmp is a global array of temporary parentheses start
positions. An element is set when a '(' is first encountered,
while when a ')' is seen, the per-regex offs array is updated with the
start and end position: the end derived from the position where the ')'
was encountered, and the start position derived from PL_reg_start_tmp[n].
This allows us to differentiate between pending and fully-processed
captures.

Change it so that the tmp start value becomes a third field in the offs
array (.start_tmp), along with the existing .start and .end fields. This
makes the value now per regex rather than global. Although it uses a bit
more memory (the start_tmp values aren't needed after the match has
completed), it simplifies the code, and will make it easier to make a
(??{}) switch to the new regex without having to dump everything on the
save stack.

perl.c
regcomp.c
regexec.c
regexp.h
scope.c
sv.c

diff --git a/perl.c b/perl.c
index c561799..f7f6c2b 100644 (file)
--- a/perl.c
+++ b/perl.c
@@ -1236,9 +1236,6 @@ perl_destruct(pTHXx)
 
     Safefree(PL_origfilename);
     PL_origfilename = NULL;
-    Safefree(PL_reg_start_tmp);
-    PL_reg_start_tmp = (char**)NULL;
-    PL_reg_start_tmpl = 0;
     Safefree(PL_reg_curpm);
     Safefree(PL_reg_poscache);
     free_tied_hv_pool();
index f2b82a8..d8dc9af 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -13901,8 +13901,6 @@ Perl_save_re_context(pTHX)
 
     Copy(&PL_reg_state, state, 1, struct re_save_state);
 
-    PL_reg_start_tmp = 0;
-    PL_reg_start_tmpl = 0;
     PL_reg_oldsaved = NULL;
     PL_reg_oldsavedlen = 0;
     PL_reg_maxiter = 0;
index 73c71f5..559e061 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -366,12 +366,12 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor)
 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
        SSPUSHINT(rex->offs[p].end);
        SSPUSHINT(rex->offs[p].start);
-       SSPUSHPTR(PL_reg_start_tmp[p]);
+       SSPUSHPTR(rex->offs[p].start_tmp);
        SSPUSHINT(p);
        DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
          "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
                      (UV)p, (IV)rex->offs[p].start,
-                     (IV)(PL_reg_start_tmp[p] - PL_bostr),
+                     (IV)(rex->offs[p].start_tmp - PL_bostr),
                      (IV)rex->offs[p].end
        ));
     }
@@ -425,7 +425,7 @@ S_regcppop(pTHX_ regexp *rex)
     for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
        I32 tmps;
        U32 paren = (U32)SSPOPINT;
-       PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
+       rex->offs[paren].start_tmp = (char *) SSPOPPTR;
        rex->offs[paren].start = SSPOPINT;
        tmps = SSPOPINT;
        if (paren <= rex->lastparen)
@@ -434,7 +434,7 @@ S_regcppop(pTHX_ regexp *rex)
            PerlIO_printf(Perl_debug_log,
                          "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
                          (UV)paren, (IV)rex->offs[paren].start,
-                         (IV)(PL_reg_start_tmp[paren] - PL_bostr),
+                         (IV)(rex->offs[paren].start_tmp - PL_bostr),
                          (IV)rex->offs[paren].end,
                          (paren > rex->lastparen ? "(no)" : ""));
        );
@@ -2645,13 +2645,6 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
     prog->lastparen = 0;
     prog->lastcloseparen = 0;
     PL_regsize = 0;
-    if (PL_reg_start_tmpl <= prog->nparens) {
-       PL_reg_start_tmpl = prog->nparens*3/2 + 3;
-        if(PL_reg_start_tmp)
-            Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
-        else
-            Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
-    }
 
     /* XXXX What this code is doing here?!!!  There should be no need
        to do this again and again, prog->lastparen should take care of
@@ -4462,14 +4455,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                );              
                startpoint = rei->program + 1;
                        ST.close_paren = 0; /* only used for GOSUB */
-                       /* borrowed from regtry */
-                if (PL_reg_start_tmpl <= re->nparens) {
-                    PL_reg_start_tmpl = re->nparens*3/2 + 3;
-                    if(PL_reg_start_tmp)
-                        Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
-                    else
-                        Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
-                }                      
 
         eval_recurse_doit: /* Share code with GOSUB below this line */                         
                /* run the pattern returned from (??{...}) */
@@ -4552,14 +4537,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 
        case OPEN:
            n = ARG(scan);  /* which paren pair */
-           PL_reg_start_tmp[n] = locinput;
+           rex->offs[n].start_tmp = locinput;
            if (n > PL_regsize)
                PL_regsize = n;
             lastopen = n;
            break;
        case CLOSE:
            n = ARG(scan);  /* which paren pair */
-           rex->offs[n].start = PL_reg_start_tmp[n] - PL_bostr;
+           rex->offs[n].start = rex->offs[n].start_tmp - PL_bostr;
            rex->offs[n].end = locinput - PL_bostr;
            /*if (n > PL_regsize)
                PL_regsize = n;*/
@@ -4581,7 +4566,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
                         n = ARG(cursor);
                         if ( n <= lastopen ) {
                             rex->offs[n].start
-                               = PL_reg_start_tmp[n] - PL_bostr;
+                                       = rex->offs[n].start_tmp - PL_bostr;
                             rex->offs[n].end = locinput - PL_bostr;
                             /*if (n > PL_regsize)
                             PL_regsize = n;*/
index ba3709e..4782ac6 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -50,9 +50,18 @@ struct reg_substr_data {
 #define SV_SAVED_COPY
 #endif
 
+/* offsets within a string of a particular /(.)/ capture */
+
 typedef struct regexp_paren_pair {
     I32 start;
     I32 end;
+    /* 'start_tmp' records a new opening position before the matching end
+     * has been found, so that the old start and end values are still
+     * valid, e.g.
+     *   "abc" =~ /(.(?{print "[$1]"}))+/
+     *outputs [][a][b]
+     * This field is not part of the API.  */
+    char *start_tmp;
 } regexp_paren_pair;
 
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
@@ -738,8 +747,6 @@ typedef struct regmatch_slab {
 #define PL_bostr               PL_reg_state.re_state_bostr
 #define PL_reginput            PL_reg_state.re_state_reginput
 #define PL_regeol              PL_reg_state.re_state_regeol
-#define PL_reg_start_tmp       PL_reg_state.re_state_reg_start_tmp
-#define PL_reg_start_tmpl      PL_reg_state.re_state_reg_start_tmpl
 #define PL_reg_match_utf8      PL_reg_state.re_state_reg_match_utf8
 #define PL_reg_magic           PL_reg_state.re_state_reg_magic
 #define PL_reg_oldpos          PL_reg_state.re_state_reg_oldpos
@@ -757,14 +764,12 @@ typedef struct regmatch_slab {
 
 struct re_save_state {
     U32 re_state_reg_flags;            /* from regexec.c */
-    U32 re_state_reg_start_tmpl;       /* from regexec.c */
     bool re_state_eval_setup_done;     /* from regexec.c */
     bool re_state_reg_match_utf8;      /* from regexec.c */
     bool re_reparsing;                 /* runtime (?{}) fed back into parser */
     char *re_state_bostr;
     char *re_state_reginput;           /* String-input pointer. */
     char *re_state_regeol;             /* End of input, for $ check. */
-    char **re_state_reg_start_tmp;     /* from regexec.c */
     MAGIC *re_state_reg_magic;         /* from regexec.c */
     PMOP *re_state_reg_oldcurpm;       /* from regexec.c */
     PMOP *re_state_reg_curpm;          /* from regexec.c */
diff --git a/scope.c b/scope.c
index 5c34a49..2a9b3d5 100644 (file)
--- a/scope.c
+++ b/scope.c
@@ -1136,9 +1136,6 @@ Perl_leave_scope(pTHX_ I32 base)
                     - SAVESTACK_ALLOC_FOR_RE_SAVE_STATE);
                PL_savestack_ix -= SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
 
-               if (PL_reg_start_tmp != state->re_state_reg_start_tmp) {
-                   Safefree(PL_reg_start_tmp);
-               }
                if (PL_reg_poscache != state->re_state_reg_poscache) {
                    Safefree(PL_reg_poscache);
                }
diff --git a/sv.c b/sv.c
index 2f8c8a7..ec80a16 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -12708,19 +12708,6 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
                    = pv_dup(old_state->re_state_reginput);
                new_state->re_state_regeol
                    = pv_dup(old_state->re_state_regeol);
-               /* XXX This just has to be broken. The old save_re_context
-                  code did SAVEGENERICPV(PL_reg_start_tmp);
-                  PL_reg_start_tmp is char **.
-                  Look above to what the dup code does for
-                  SAVEt_GENERIC_PVREF
-                  It can never have worked.
-                  So this is merely a faithful copy of the exiting bug:  */
-               new_state->re_state_reg_start_tmp
-                   = (char **) pv_dup((char *)
-                                     old_state->re_state_reg_start_tmp);
-               /* I assume that it only ever "worked" because no-one called
-                  (pseudo)fork while the regexp engine had re-entered itself.
-               */
 #ifdef PERL_OLD_COPY_ON_WRITE
                new_state->re_state_nrs
                    = sv_dup(old_state->re_state_nrs, param);