regexec.c: Avoid unnecessary folding
authorKarl Williamson <khw@cpan.org>
Wed, 19 Dec 2018 19:19:27 +0000 (12:19 -0700)
committerKarl Williamson <khw@cpan.org>
Wed, 26 Dec 2018 19:50:37 +0000 (12:50 -0700)
Previous commits caused the pattern under /i to be folded as much as
possible.  This commit takes advantage of this by not folding when we
know it already has been folded.

regcomp.c
regexec.c

index 83c08dd..a907786 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -14403,7 +14403,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                         goto loopdone;
                     }
 
-                    if (UTF) {  /* For UTF-8, we add the folded value */
+                    if (UTF) {  /* Use the folded value */
                         if (UVCHR_IS_INVARIANT(ender)) {
                             *(s)++ = (U8) toFOLD(ender);
                         }
index 43ce53d..ca570de 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -2303,7 +2303,6 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
     case EXACTF:   /* This node only generated for non-utf8 patterns */
         assert(! is_utf8_pat);
         if (utf8_target) {
-            utf8_fold_flags = 0;
             goto do_exactf_utf8;
         }
         fold_array = PL_fold;
@@ -2345,7 +2344,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
 
     case EXACTFU:
         if (is_utf8_pat || utf8_target) {
-            utf8_fold_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+            utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
             goto do_exactf_utf8;
         }
 
@@ -2353,7 +2352,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
          * so we don't have to worry here about this single special case
          * in the Latin1 range */
         fold_array = PL_fold_latin1;
-        folder = foldEQ_latin1;
+        folder = foldEQ_latin1_s2_folded;
 
         /* FALLTHROUGH */
 
@@ -6408,7 +6407,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             }
             fold_utf8_flags =  FOLDEQ_LOCALE | FOLDEQ_S2_ALREADY_FOLDED
                                              | FOLDEQ_S2_FOLDS_SANE;
-           folder = foldEQ_latin1;
+           folder = foldEQ_latin1_s2_folded;
            fold_array = PL_fold_latin1;
            goto do_exactf;
 
@@ -6423,11 +6422,15 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
         case EXACTFUP:          /*  /foo/iu, and something is problematic in
                                     'foo' so can't take shortcuts. */
             assert(! is_utf8_pat);
-            /* FALLTHROUGH */
+            folder = foldEQ_latin1;
+           fold_array = PL_fold_latin1;
+           fold_utf8_flags = 0;
+           goto do_exactf;
+
        case EXACTFU:            /*  /abc/iu      */
-           folder = foldEQ_latin1;
+            folder = foldEQ_latin1_s2_folded;
            fold_array = PL_fold_latin1;
-           fold_utf8_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+           fold_utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
            goto do_exactf;
 
         case EXACTFAA_NO_TRIE:   /* This node only generated for non-utf8
@@ -9358,9 +9361,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         goto do_exactf;
 
     case EXACTFU:
-       if (reginfo->is_utf8_pat) {
-            utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
-        }
+        utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
         /* FALLTHROUGH */
 
     case EXACTFUP: