This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
eliminate PL_reg_poscache, PL_reg_poscache_size
authorDavid Mitchell <davem@iabyn.com>
Fri, 31 May 2013 14:40:48 +0000 (15:40 +0100)
committerDavid Mitchell <davem@iabyn.com>
Sun, 2 Jun 2013 21:28:54 +0000 (22:28 +0100)
Eliminate these two global vars (well, fields in the global
PL_reg_state), that hold the regex super-liner cache.

PL_reg_poscache_size gets replaced with a field in the local regmatch_info
struct, while PL_reg_poscache (which needs freeing at end of pattern
execution or on croak()), goes in the regmatch_info_aux struct.

Note that this includes a slight change in behaviour.
Each regex execution now has its own private poscache pointer, initially
null.  If the super-linear behaviour is detected, the cache is malloced,
used for the duration of the pattern match, then freed.

The former behaviour allocated a global poscache on first use, which was
retained between regex executions. Since the poscache could between 0.25
and 2x the size of the string being matched, that could potentially be a
big buffer lying around unused. So we save memory at the expense of a new
malloc/free for every regex that triggers super-linear behaviour.

The old behaviour saved the old pointer on reentrancy, then restored the
old one (and possibly freed the new buffer) at exit. Except it didn't for
(?{}), so /(?{ m{something-that-triggers-super-linear-cache} })/ would
leak each time the inner regex was called. This is now fixed
automatically.

perl.c
regcomp.c
regexec.c
regexp.h
scope.c
sv.c

diff --git a/perl.c b/perl.c
index 06953c6..726f571 100644 (file)
--- a/perl.c
+++ b/perl.c
@@ -1219,7 +1219,6 @@ perl_destruct(pTHXx)
     Safefree(PL_origfilename);
     PL_origfilename = NULL;
     Safefree(PL_reg_curpm);
-    Safefree(PL_reg_poscache);
     free_tied_hv_pool();
     Safefree(PL_op_mask);
     Safefree(PL_psig_name);
@@ -3626,7 +3625,6 @@ S_init_interp(pTHX)
     /* As these are inside a structure, PERLVARI isn't capable of initialising
        them  */
     PL_reg_curpm = NULL;
-    PL_reg_poscache = PL_reg_starttry = NULL;
 }
 
 STATIC void
index 5712e9f..03d5d50 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -15469,9 +15469,6 @@ Perl_save_re_context(pTHX)
 
     Copy(&PL_reg_state, state, 1, struct re_save_state);
 
-    PL_reg_poscache = NULL;
-    PL_reg_poscache_size = 0;
-
     /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
     if (PL_curpm) {
        const REGEXP * const rx = PM_GETRE(PL_curpm);
index dbd2dbf..e1580de 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -2181,6 +2181,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
 
         reginfo->info_aux->old_regmatch_state = old_regmatch_state;
         reginfo->info_aux->old_regmatch_slab  = old_regmatch_slab;
+        reginfo->info_aux->poscache = NULL;
 
         SAVEDESTRUCTOR_X(S_cleanup_regmatch_info_aux, reginfo->info_aux);
 
@@ -2900,7 +2901,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
    "unreachable code" warnings, which are bogus, but distracting. */
 #define CACHEsayNO \
     if (ST.cache_mask) \
-       PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
+       reginfo->info_aux->poscache[ST.cache_offset] |= ST.cache_mask; \
     sayNO
 
 /* this is used to determine how far from the left messages like
@@ -5444,16 +5445,17 @@ NULL
                if (reginfo->poscache_iter-- == 0) {
                    /* initialise cache */
                    const I32 size = (reginfo->poscache_maxiter + 7)/8;
-                   if (PL_reg_poscache) {
-                       if ((I32)PL_reg_poscache_size < size) {
-                           Renew(PL_reg_poscache, size, char);
-                           PL_reg_poscache_size = size;
+                    regmatch_info_aux *const aux = reginfo->info_aux;
+                   if (aux->poscache) {
+                       if ((I32)reginfo->poscache_size < size) {
+                           Renew(aux->poscache, size, char);
+                           reginfo->poscache_size = size;
                        }
-                       Zero(PL_reg_poscache, size, char);
+                       Zero(aux->poscache, size, char);
                    }
                    else {
-                       PL_reg_poscache_size = size;
-                       Newxz(PL_reg_poscache, size, char);
+                       reginfo->poscache_size = size;
+                       Newxz(aux->poscache, size, char);
                    }
                    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
@@ -5469,7 +5471,7 @@ NULL
                                   * (scan->flags>>4);
                    mask    = 1 << (offset % 8);
                    offset /= 8;
-                   if (PL_reg_poscache[offset] & mask) {
+                   if (reginfo->info_aux->poscache[offset] & mask) {
                        DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
                            "%*s  whilem: (cache) already tried at this position...\n",
                            REPORT_CODE_OFF+depth*2, "")
@@ -7584,6 +7586,8 @@ S_cleanup_regmatch_info_aux(pTHX_ void *arg)
     regmatch_info_aux_eval *eval_state =  aux->info_aux_eval;
     regmatch_slab *s;
 
+    Safefree(aux->poscache);
+
     if (eval_state) {
 
         /* undo the effects of S_setup_eval_state() */
index 0b27fdf..01c8f36 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -606,6 +606,7 @@ typedef struct {
     regmatch_info_aux_eval *info_aux_eval;
     struct regmatch_state *old_regmatch_state; /* saved PL_regmatch_state */
     struct regmatch_slab  *old_regmatch_slab;  /* saved PL_regmatch_slab */
+    char *poscache;    /* S-L cache of fail positions of WHILEMs */
 } regmatch_info_aux;
 
 
@@ -628,6 +629,7 @@ typedef struct {
     regmatch_info_aux_eval *info_aux_eval; /* extra saved state for (?{}) */
     I32  poscache_maxiter; /* how many whilems todo before S-L cache kicks in */
     I32  poscache_iter;    /* current countdown from _maxiter to zero */
+    STRLEN poscache_size;  /* size of regmatch_info_aux.poscache */
     bool intuit;    /* re_intuit_start() is the top-level caller */
     bool is_utf8_pat;    /* regex is utf8 */
     bool is_utf8_target; /* string being matched is utf8 */
@@ -827,14 +829,10 @@ typedef struct regmatch_slab {
 } regmatch_slab;
 
 #define PL_reg_curpm           PL_reg_state.re_state_reg_curpm
-#define PL_reg_poscache                PL_reg_state.re_state_reg_poscache
-#define PL_reg_poscache_size   PL_reg_state.re_state_reg_poscache_size
 #define PL_reg_starttry                PL_reg_state.re_state_reg_starttry
 
 struct re_save_state {
     PMOP *re_state_reg_curpm;          /* from regexec.c */
-    STRLEN re_state_reg_poscache_size; /* size of pos cache of WHILEM */
-    char *re_state_reg_poscache;       /* cache of pos of WHILEM */
     char *re_state_reg_starttry;       /* from regexec.c */
 };
 
diff --git a/scope.c b/scope.c
index 969f6ff..2d3810f 100644 (file)
--- a/scope.c
+++ b/scope.c
@@ -1233,9 +1233,6 @@ Perl_leave_scope(pTHX_ I32 base)
                     - SAVESTACK_ALLOC_FOR_RE_SAVE_STATE);
                PL_savestack_ix -= SAVESTACK_ALLOC_FOR_RE_SAVE_STATE;
 
-               if (PL_reg_poscache != state->re_state_reg_poscache) {
-                   Safefree(PL_reg_poscache);
-               }
                Copy(state, &PL_reg_state, 1, struct re_save_state);
            }
            break;
diff --git a/sv.c b/sv.c
index 85110ac..66b58e0 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -12904,10 +12904,6 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param)
                new_state->re_state_reg_curpm
                    = (PMOP*)  any_dup(old_state->re_state_reg_curpm, 
                               proto_perl);
-               new_state->re_state_reg_poscache
-                   = pv_dup(old_state->re_state_reg_poscache);
-               new_state->re_state_reg_starttry
-                   = pv_dup(old_state->re_state_reg_starttry);
                break;
            }
        case SAVEt_COMPILE_WARNINGS: