This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c - cleanup the ahocorasick start class logic so it more self-documenting
authorYves Orton <demerphq@gmail.com>
Sat, 19 Apr 2014 12:42:47 +0000 (14:42 +0200)
committerYves Orton <demerphq@gmail.com>
Sun, 1 Jun 2014 22:42:07 +0000 (00:42 +0200)
The logic of setting up an AHO-CORASICK regex start class was not fully
encapsuated in the make_trie_failtable() function, which itself was
poorly named. Merged the code into make_trie_failtable() and renamed
it to construct_ahocorasick_from_trie().

embed.fnc
embed.h
proto.h
regcomp.c

index 85dbdf4..159f26c 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -2156,9 +2156,8 @@ Es        |I32    |make_trie      |NN RExC_state_t *pRExC_state \
                                |NN regnode *startbranch|NN regnode *first \
                                |NN regnode *last|NN regnode *tail \
                                |U32 word_count|U32 flags|U32 depth
-Es     |void   |make_trie_failtable    |NN RExC_state_t *pRExC_state \
-                                |NN regnode *source|NN regnode *stclass \
-                               |U32 depth
+Es     |regnode *|construct_ahocorasick_from_trie|NN RExC_state_t *pRExC_state \
+                                |NN regnode *source|U32 depth
 #  ifdef DEBUGGING
 Es        |void        |regdump_intflags|NULLOK const char *lead| const U32 flags
 Es     |void   |regdump_extflags|NULLOK const char *lead| const U32 flags
diff --git a/embed.h b/embed.h
index ca9983d..ca1b91b 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define add_data               S_add_data
 #define alloc_maybe_populate_EXACT(a,b,c,d,e,f)        S_alloc_maybe_populate_EXACT(aTHX_ a,b,c,d,e,f)
 #define compute_EXACTish(a)    S_compute_EXACTish(aTHX_ a)
+#define construct_ahocorasick_from_trie(a,b,c) S_construct_ahocorasick_from_trie(aTHX_ a,b,c)
 #define could_it_be_a_POSIX_class(a)   S_could_it_be_a_POSIX_class(aTHX_ a)
 #define get_ANYOF_cp_list_for_ssc(a,b) S_get_ANYOF_cp_list_for_ssc(aTHX_ a,b)
 #define get_invlist_iter_addr(a)       S_get_invlist_iter_addr(aTHX_ a)
 #define invlist_trim(a)                S_invlist_trim(aTHX_ a)
 #define join_exact(a,b,c,d,e,f,g)      S_join_exact(aTHX_ a,b,c,d,e,f,g)
 #define make_trie(a,b,c,d,e,f,g,h)     S_make_trie(aTHX_ a,b,c,d,e,f,g,h)
-#define make_trie_failtable(a,b,c,d)   S_make_trie_failtable(aTHX_ a,b,c,d)
 #define nextchar(a)            S_nextchar(aTHX_ a)
 #define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a)
 #define populate_ANYOF_from_invlist(a,b)       S_populate_ANYOF_from_invlist(aTHX_ a,b)
diff --git a/proto.h b/proto.h
index 70cf3de..eb60a4f 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -6701,6 +6701,12 @@ PERL_STATIC_INLINE U8    S_compute_EXACTish(pTHX_ RExC_state_t *pRExC_state)
 #define PERL_ARGS_ASSERT_COMPUTE_EXACTISH      \
        assert(pRExC_state)
 
+STATIC regnode *       S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *source, U32 depth)
+                       __attribute__nonnull__(pTHX_1)
+                       __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE       \
+       assert(pRExC_state); assert(source)
+
 STATIC bool    S_could_it_be_a_POSIX_class(pTHX_ RExC_state_t *pRExC_state)
                        __attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_COULD_IT_BE_A_POSIX_CLASS     \
@@ -6828,13 +6834,6 @@ STATIC I32       S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, re
 #define PERL_ARGS_ASSERT_MAKE_TRIE     \
        assert(pRExC_state); assert(startbranch); assert(first); assert(last); assert(tail)
 
-STATIC void    S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode *stclass, U32 depth)
-                       __attribute__nonnull__(pTHX_1)
-                       __attribute__nonnull__(pTHX_2)
-                       __attribute__nonnull__(pTHX_3);
-#define PERL_ARGS_ASSERT_MAKE_TRIE_FAILTABLE   \
-       assert(pRExC_state); assert(source); assert(stclass)
-
 STATIC char *  S_nextchar(pTHX_ RExC_state_t *pRExC_state)
                        __attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_NEXTCHAR      \
index baeea53..a395e2b 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -2984,8 +2984,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
              : MADE_TRIE;
 }
 
-STATIC void
-S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode *stclass, U32 depth)
+STATIC regnode *
+S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *source, U32 depth)
 {
 /* The Trie is constructed and compressed now so we can build a fail array if
  * it's needed
@@ -3023,13 +3023,26 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
     U32 *fail;
     reg_ac_data *aho;
     const U32 data_slot = add_data( pRExC_state, STR_WITH_LEN("T"));
+    regnode *stclass;
     GET_RE_DEBUG_FLAGS_DECL;
 
-    PERL_ARGS_ASSERT_MAKE_TRIE_FAILTABLE;
+    PERL_ARGS_ASSERT_CONSTRUCT_AHOCORASICK_FROM_TRIE;
 #ifndef DEBUGGING
     PERL_UNUSED_ARG(depth);
 #endif
 
+    if ( OP(source) == TRIE ) {
+        struct regnode_1 *op = (struct regnode_1 *)
+            PerlMemShared_calloc(1, sizeof(struct regnode_1));
+        StructCopy(source,op,struct regnode_1);
+        stclass = (regnode *)op;
+    } else {
+        struct regnode_charclass *op = (struct regnode_charclass *)
+            PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
+        StructCopy(source,op,struct regnode_charclass);
+        stclass = (regnode *)op;
+    }
+    OP(stclass)+=2; /* covert the TRIE type to its AHO-CORASICK equivalent */
 
     ARG_SET( stclass, data_slot );
     aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
@@ -3096,6 +3109,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
     });
     Safefree(q);
     /*RExC_seen |= REG_TRIEDFA_SEEN;*/
+    return stclass;
 }
 
 
@@ -6824,22 +6838,8 @@ reStudy:
        else if (PL_regkind[OP(first)] == TRIE &&
                ((reg_trie_data *)ri->data->data[ ARG(first) ])->minlen>0)
        {
-           regnode *trie_op;
-           /* this can happen only on restudy */
-           if ( OP(first) == TRIE ) {
-                struct regnode_1 *trieop = (struct regnode_1 *)
-                   PerlMemShared_calloc(1, sizeof(struct regnode_1));
-                StructCopy(first,trieop,struct regnode_1);
-                trie_op=(regnode *)trieop;
-            } else {
-                struct regnode_charclass *trieop = (struct regnode_charclass *)
-                   PerlMemShared_calloc(1, sizeof(struct regnode_charclass));
-                StructCopy(first,trieop,struct regnode_charclass);
-                trie_op=(regnode *)trieop;
-            }
-            OP(trie_op)+=2;
-            make_trie_failtable(pRExC_state, (regnode *)first, trie_op, 0);
-           ri->regstclass = trie_op;
+            /* this can happen only on restudy */
+            ri->regstclass = construct_ahocorasick_from_trie(pRExC_state, (regnode *)first, 0);
        }
 #endif
        else if (REGNODE_SIMPLE(OP(first)))
@@ -16192,7 +16192,16 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                         PerlMemShared_free(aho->fail);
                         /* do this last!!!! */
                         PerlMemShared_free(ri->data->data[n]);
-                        PerlMemShared_free(ri->regstclass);
+                        /* we should only ever get called once, so
+                         * assert as much, and also guard the free
+                         * which /might/ happen twice. At the least
+                         * it will make code anlyzers happy and it
+                         * doesn't cost much. - Yves */
+                        assert(ri->regstclass);
+                        if (ri->regstclass) {
+                            PerlMemShared_free(ri->regstclass);
+                            ri->regstclass = 0;
+                        }
                     }
                 }
                 break;