This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Don't restart parse for /d to /u if no need to
authorKarl Williamson <khw@cpan.org>
Wed, 7 Nov 2018 01:26:39 +0000 (18:26 -0700)
committerKarl Williamson <khw@cpan.org>
Fri, 16 Nov 2018 17:48:19 +0000 (10:48 -0700)
This commit keeps track of if there are any operations encountered which
differ under /d from /u.  If we switch to /u and haven't so far found
anything which differs, there's no need to reparse

regcomp.c

index 1c3664b..cc64902 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -219,6 +219,7 @@ struct RExC_state_t {
 #define RExC_mysv2     (pRExC_state->mysv2)
 
 #endif
+    bool        seen_d_op;
     bool        strict;
     bool        study_started;
     bool        in_script_run;
@@ -239,6 +240,8 @@ struct RExC_state_t {
 #define RExC_parse     (pRExC_state->parse)
 #define RExC_latest_warn_offset (pRExC_state->latest_warn_offset )
 #define RExC_whilem_seen       (pRExC_state->whilem_seen)
+#define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
+                                                   under /d from /u ? */
 
 
 #ifdef RE_TRACK_PATTERN_OFFSETS
@@ -349,9 +352,11 @@ struct RExC_state_t {
             if (DEPENDS_SEMANTICS) {                                        \
                 set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);      \
                 RExC_uni_semantics = 1;                                     \
-                if (LIKELY(RExC_total_parens >= 0)) {                       \
-                    /* No need to restart the parse immediately if we're    \
-                     * going to reparse anyway to count parens */           \
+                if (RExC_seen_d_op && LIKELY(RExC_total_parens >= 0)) {     \
+                    /* No need to restart the parse if we haven't seen      \
+                     * anything that differs between /u and /d, and no need \
+                     * to restart immediately if we're going to reparse     \
+                     * anyway to count parens */                            \
                     *flagp |= RESTART_PARSE;                                \
                     return restart_retval;                                  \
                 }                                                           \
@@ -7287,6 +7292,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_close_parens = NULL;
     RExC_paren_names = NULL;
     RExC_size = 0;
+    RExC_seen_d_op = FALSE;
 #ifdef DEBUGGING
     RExC_paren_name_list = NULL;
 #endif
@@ -13276,7 +13282,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             RExC_seen |= REG_LOOKBEHIND_SEEN;
            op = BOUND + charset;
 
-            if (op == BOUNDL) {
+            if (op == BOUND) {
+                RExC_seen_d_op = TRUE;
+            }
+            else if (op == BOUNDL) {
                 RExC_contains_locale = 1;
             }
 
@@ -13425,6 +13434,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             else if (op == POSIXL) {
                 RExC_contains_locale = 1;
             }
+            else if (op == POSIXD) {
+                RExC_seen_d_op = TRUE;
+            }
 
           join_posix_op_known:
 
@@ -13643,6 +13655,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                                        ? REFFL
                                        : REFF),
                                 num);
+                if (OP(REGNODE_p(ret)) == REFF) {
+                    RExC_seen_d_op = TRUE;
+                }
                 *flagp |= HASWIDTH;
 
                 /* override incorrect value set in reganode MJD */
@@ -14450,6 +14465,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                             OP(REGNODE_p(ret)) = EXACTFLU8;
                         }
                     }
+                    else if (node_type == EXACTF) {
+                        RExC_seen_d_op = TRUE;
+                    }
                 }
 
                 alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender,
@@ -18076,10 +18094,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  || (anyof_flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)))
     {
         use_anyofd = TRUE;
+        RExC_seen_d_op = TRUE;
         optimizable = FALSE;
     }
 
-
     /* Optimize inverted simple patterns (e.g. [^a-z]) when everything is known
      * at compile time.  Besides not inverting folded locale now, we can't
      * invert if there are things such as \w, which aren't known until runtime