This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
clear savestack on (?{...}) failure and backtrack
authorDavid Mitchell <davem@iabyn.com>
Tue, 14 Feb 2017 15:59:57 +0000 (15:59 +0000)
committerDavid Mitchell <davem@iabyn.com>
Tue, 14 Feb 2017 17:49:58 +0000 (17:49 +0000)
RT #126697

In a regex, after executing a (?{...}) code block, if we fail and
backtrack over the codeblock, we're supposed to unwind the savestack, so
that for any example any local()s within the code block are undone.

It turns out that a backtracking state isn't pushed for (?{...}), only
for postponed evals ( i.e.  (??{...})). This means that it relies on one
of the earlier backtracking states to clear the savestack on its behalf.
This can't always be relied upon, and the ticket above contains code where
this falls down; in particular:

    'ABC' =~ m{
        \A
        (?:
            (?: AB | A | BC )
            (?{
                local $count = $count + 1;
                print "! count=$count; ; pos=${\pos}\n";
            })
        )*
        \z
    }x

Here we end up relying on TRIE_next to do the cleaning up, but TRIE_next
doesn't, since there's nothing it would be responsible for that needs
cleaning up.

The solution to this is to push a backtrack state for every (?{...}) as
well as every (??{...}). The sole job of that state is to do a
LEAVE_SCOPE(ST.lastcp).

The existing backtrack state EVAL_AB has been renamed EVAL_postponed_AB
to make it clear it's only used on postponed /(??{A})B/ regexes, and a new
state has been added, EVAL_B, which is only called when backtracking after
failing something in the B in /(?{...})B/.

regcomp.sym
regexec.c
regnodes.h
t/re/pat_re_eval.t

index ac67955..999d965 100644 (file)
@@ -243,7 +243,7 @@ PSEUDO      PSEUDO,     off       ; Pseudo opcode for internal use.
 #
 #
 TRIE            next:FAIL
-EVAL            AB:FAIL
+EVAL            B,postponed_AB:FAIL
 CURLYX          end:FAIL
 WHILEM          A_pre,A_min,A_max,B_min,B_max:FAIL
 BRANCH          next:FAIL
index bf9809a..e383655 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -5391,7 +5391,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
     U8 gimme = G_SCALAR;
     CV *caller_cv = NULL;      /* who called us */
     CV *last_pushed_cv = NULL; /* most recently called (?{}) CV */
-    CHECKPOINT runops_cp;      /* savestack position before executing EVAL */
     U32 maxopenparen = 0;       /* max '(' index seen so far */
     int to_complement;  /* Invert the result? */
     _char_class_number classnum;
@@ -6787,7 +6786,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
             goto eval_recurse_doit;
             /* NOTREACHED */
 
-        case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */        
+        case EVAL:  /*   /(?{...})B/   /(??{A})B/  and  /(?(?{...})X|Y)B/   */
             if (cur_eval && cur_eval->locinput==locinput) {
                if ( ++nochange_depth > max_nochange_depth )
                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
@@ -6806,7 +6805,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 
                /* save *all* paren positions */
                 regcppush(rex, 0, maxopenparen);
-               REGCP_SET(runops_cp);
+                REGCP_SET(ST.lastcp);
 
                if (!caller_cv)
                    caller_cv = find_runcv(NULL);
@@ -7008,12 +7007,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                 * in the regexp code uses the pad ! */
                PL_op = oop;
                PL_curcop = ocurcop;
-                regcp_restore(rex, runops_cp, &maxopenparen);
+                regcp_restore(rex, ST.lastcp, &maxopenparen);
                 PL_curpm_under = PL_curpm;
                 PL_curpm = PL_reg_curpm;
 
-               if (logical != 2)
-                   break;
+               if (logical != 2) {
+                    PUSH_STATE_GOTO(EVAL_B, next, locinput);
+                   /* NOTREACHED */
+                }
            }
 
                /* only /(??{})/  from now on */
@@ -7111,11 +7112,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                ST.prev_eval = cur_eval;
                cur_eval = st;
                /* now continue from first node in postoned RE */
-               PUSH_YES_STATE_GOTO(EVAL_AB, startpoint, locinput);
+               PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput);
                NOT_REACHED; /* NOTREACHED */
        }
 
-       case EVAL_AB: /* cleanup after a successful (??{A})B */
+       case EVAL_postponed_AB: /* cleanup after a successful (??{A})B */
             /* note: this is called twice; first after popping B, then A */
             DEBUG_STACK_r({
                 Perl_re_exec_indentf( aTHX_  "EVAL_AB cur_eval=%p prev_eval=%p\n",
@@ -7161,7 +7162,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
            sayYES;
 
 
-       case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
+       case EVAL_B_fail: /* unsuccessful B in (?{...})B */
+           REGCP_UNWIND(ST.lastcp);
+            sayNO;
+
+       case EVAL_postponed_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
            /* note: this is called twice; first after popping B, then A */
             DEBUG_STACK_r({
                 Perl_re_exec_indentf( aTHX_  "EVAL_AB_fail cur_eval=%p prev_eval=%p\n",
@@ -8263,7 +8268,7 @@ NULL
 
                 SET_RECURSE_LOCINPUT("FAKE-END[after]", cur_eval->locinput);
 
-                PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B,
+                PUSH_YES_STATE_GOTO(EVAL_postponed_AB, st->u.eval.prev_eval->u.eval.B,
                                     locinput); /* match B */
            }
 
index f820c56..8fe0f41 100644 (file)
@@ -7,7 +7,7 @@
 /* Regops and State definitions */
 
 #define REGNODE_MAX            92
-#define REGMATCH_STATE_MAX     132
+#define REGMATCH_STATE_MAX     134
 
 #define        END                     0       /* 0000 End of program. */
 #define        SUCCEED                 1       /* 0x01 Return from a subroutine, basically. */
        /* ------------ States ------------- */
 #define        TRIE_next               (REGNODE_MAX + 1)       /* state for TRIE */
 #define        TRIE_next_fail          (REGNODE_MAX + 2)       /* state for TRIE */
-#define        EVAL_AB                 (REGNODE_MAX + 3)       /* state for EVAL */
-#define        EVAL_AB_fail            (REGNODE_MAX + 4)       /* state for EVAL */
-#define        CURLYX_end              (REGNODE_MAX + 5)       /* state for CURLYX */
-#define        CURLYX_end_fail         (REGNODE_MAX + 6)       /* state for CURLYX */
-#define        WHILEM_A_pre            (REGNODE_MAX + 7)       /* state for WHILEM */
-#define        WHILEM_A_pre_fail       (REGNODE_MAX + 8)       /* state for WHILEM */
-#define        WHILEM_A_min            (REGNODE_MAX + 9)       /* state for WHILEM */
-#define        WHILEM_A_min_fail       (REGNODE_MAX + 10)      /* state for WHILEM */
-#define        WHILEM_A_max            (REGNODE_MAX + 11)      /* state for WHILEM */
-#define        WHILEM_A_max_fail       (REGNODE_MAX + 12)      /* state for WHILEM */
-#define        WHILEM_B_min            (REGNODE_MAX + 13)      /* state for WHILEM */
-#define        WHILEM_B_min_fail       (REGNODE_MAX + 14)      /* state for WHILEM */
-#define        WHILEM_B_max            (REGNODE_MAX + 15)      /* state for WHILEM */
-#define        WHILEM_B_max_fail       (REGNODE_MAX + 16)      /* state for WHILEM */
-#define        BRANCH_next             (REGNODE_MAX + 17)      /* state for BRANCH */
-#define        BRANCH_next_fail        (REGNODE_MAX + 18)      /* state for BRANCH */
-#define        CURLYM_A                (REGNODE_MAX + 19)      /* state for CURLYM */
-#define        CURLYM_A_fail           (REGNODE_MAX + 20)      /* state for CURLYM */
-#define        CURLYM_B                (REGNODE_MAX + 21)      /* state for CURLYM */
-#define        CURLYM_B_fail           (REGNODE_MAX + 22)      /* state for CURLYM */
-#define        IFMATCH_A               (REGNODE_MAX + 23)      /* state for IFMATCH */
-#define        IFMATCH_A_fail          (REGNODE_MAX + 24)      /* state for IFMATCH */
-#define        CURLY_B_min_known       (REGNODE_MAX + 25)      /* state for CURLY */
-#define        CURLY_B_min_known_fail  (REGNODE_MAX + 26)      /* state for CURLY */
-#define        CURLY_B_min             (REGNODE_MAX + 27)      /* state for CURLY */
-#define        CURLY_B_min_fail        (REGNODE_MAX + 28)      /* state for CURLY */
-#define        CURLY_B_max             (REGNODE_MAX + 29)      /* state for CURLY */
-#define        CURLY_B_max_fail        (REGNODE_MAX + 30)      /* state for CURLY */
-#define        COMMIT_next             (REGNODE_MAX + 31)      /* state for COMMIT */
-#define        COMMIT_next_fail        (REGNODE_MAX + 32)      /* state for COMMIT */
-#define        MARKPOINT_next          (REGNODE_MAX + 33)      /* state for MARKPOINT */
-#define        MARKPOINT_next_fail     (REGNODE_MAX + 34)      /* state for MARKPOINT */
-#define        SKIP_next               (REGNODE_MAX + 35)      /* state for SKIP */
-#define        SKIP_next_fail          (REGNODE_MAX + 36)      /* state for SKIP */
-#define        CUTGROUP_next           (REGNODE_MAX + 37)      /* state for CUTGROUP */
-#define        CUTGROUP_next_fail      (REGNODE_MAX + 38)      /* state for CUTGROUP */
-#define        KEEPS_next              (REGNODE_MAX + 39)      /* state for KEEPS */
-#define        KEEPS_next_fail         (REGNODE_MAX + 40)      /* state for KEEPS */
+#define        EVAL_B                  (REGNODE_MAX + 3)       /* state for EVAL */
+#define        EVAL_B_fail             (REGNODE_MAX + 4)       /* state for EVAL */
+#define        EVAL_postponed_AB       (REGNODE_MAX + 5)       /* state for EVAL */
+#define        EVAL_postponed_AB_fail  (REGNODE_MAX + 6)       /* state for EVAL */
+#define        CURLYX_end              (REGNODE_MAX + 7)       /* state for CURLYX */
+#define        CURLYX_end_fail         (REGNODE_MAX + 8)       /* state for CURLYX */
+#define        WHILEM_A_pre            (REGNODE_MAX + 9)       /* state for WHILEM */
+#define        WHILEM_A_pre_fail       (REGNODE_MAX + 10)      /* state for WHILEM */
+#define        WHILEM_A_min            (REGNODE_MAX + 11)      /* state for WHILEM */
+#define        WHILEM_A_min_fail       (REGNODE_MAX + 12)      /* state for WHILEM */
+#define        WHILEM_A_max            (REGNODE_MAX + 13)      /* state for WHILEM */
+#define        WHILEM_A_max_fail       (REGNODE_MAX + 14)      /* state for WHILEM */
+#define        WHILEM_B_min            (REGNODE_MAX + 15)      /* state for WHILEM */
+#define        WHILEM_B_min_fail       (REGNODE_MAX + 16)      /* state for WHILEM */
+#define        WHILEM_B_max            (REGNODE_MAX + 17)      /* state for WHILEM */
+#define        WHILEM_B_max_fail       (REGNODE_MAX + 18)      /* state for WHILEM */
+#define        BRANCH_next             (REGNODE_MAX + 19)      /* state for BRANCH */
+#define        BRANCH_next_fail        (REGNODE_MAX + 20)      /* state for BRANCH */
+#define        CURLYM_A                (REGNODE_MAX + 21)      /* state for CURLYM */
+#define        CURLYM_A_fail           (REGNODE_MAX + 22)      /* state for CURLYM */
+#define        CURLYM_B                (REGNODE_MAX + 23)      /* state for CURLYM */
+#define        CURLYM_B_fail           (REGNODE_MAX + 24)      /* state for CURLYM */
+#define        IFMATCH_A               (REGNODE_MAX + 25)      /* state for IFMATCH */
+#define        IFMATCH_A_fail          (REGNODE_MAX + 26)      /* state for IFMATCH */
+#define        CURLY_B_min_known       (REGNODE_MAX + 27)      /* state for CURLY */
+#define        CURLY_B_min_known_fail  (REGNODE_MAX + 28)      /* state for CURLY */
+#define        CURLY_B_min             (REGNODE_MAX + 29)      /* state for CURLY */
+#define        CURLY_B_min_fail        (REGNODE_MAX + 30)      /* state for CURLY */
+#define        CURLY_B_max             (REGNODE_MAX + 31)      /* state for CURLY */
+#define        CURLY_B_max_fail        (REGNODE_MAX + 32)      /* state for CURLY */
+#define        COMMIT_next             (REGNODE_MAX + 33)      /* state for COMMIT */
+#define        COMMIT_next_fail        (REGNODE_MAX + 34)      /* state for COMMIT */
+#define        MARKPOINT_next          (REGNODE_MAX + 35)      /* state for MARKPOINT */
+#define        MARKPOINT_next_fail     (REGNODE_MAX + 36)      /* state for MARKPOINT */
+#define        SKIP_next               (REGNODE_MAX + 37)      /* state for SKIP */
+#define        SKIP_next_fail          (REGNODE_MAX + 38)      /* state for SKIP */
+#define        CUTGROUP_next           (REGNODE_MAX + 39)      /* state for CUTGROUP */
+#define        CUTGROUP_next_fail      (REGNODE_MAX + 40)      /* state for CUTGROUP */
+#define        KEEPS_next              (REGNODE_MAX + 41)      /* state for KEEPS */
+#define        KEEPS_next_fail         (REGNODE_MAX + 42)      /* state for KEEPS */
 
 /* PL_regkind[] What type of regop or state is this. */
 
@@ -248,8 +250,10 @@ EXTCONST U8 PL_regkind[] = {
        /* ------------ States ------------- */
        TRIE,           /* TRIE_next              */
        TRIE,           /* TRIE_next_fail         */
-       EVAL,           /* EVAL_AB                */
-       EVAL,           /* EVAL_AB_fail           */
+       EVAL,           /* EVAL_B                 */
+       EVAL,           /* EVAL_B_fail            */
+       EVAL,           /* EVAL_postponed_AB      */
+       EVAL,           /* EVAL_postponed_AB_fail */
        CURLYX,         /* CURLYX_end             */
        CURLYX,         /* CURLYX_end_fail        */
        WHILEM,         /* WHILEM_A_pre           */
@@ -592,44 +596,46 @@ EXTCONST char * const PL_reg_name[] = {
        /* ------------ States ------------- */
        "TRIE_next",                    /* REGNODE_MAX +0x01 */
        "TRIE_next_fail",               /* REGNODE_MAX +0x02 */
-       "EVAL_AB",                      /* REGNODE_MAX +0x03 */
-       "EVAL_AB_fail",                 /* REGNODE_MAX +0x04 */
-       "CURLYX_end",                   /* REGNODE_MAX +0x05 */
-       "CURLYX_end_fail",              /* REGNODE_MAX +0x06 */
-       "WHILEM_A_pre",                 /* REGNODE_MAX +0x07 */
-       "WHILEM_A_pre_fail",            /* REGNODE_MAX +0x08 */
-       "WHILEM_A_min",                 /* REGNODE_MAX +0x09 */
-       "WHILEM_A_min_fail",            /* REGNODE_MAX +0x0a */
-       "WHILEM_A_max",                 /* REGNODE_MAX +0x0b */
-       "WHILEM_A_max_fail",            /* REGNODE_MAX +0x0c */
-       "WHILEM_B_min",                 /* REGNODE_MAX +0x0d */
-       "WHILEM_B_min_fail",            /* REGNODE_MAX +0x0e */
-       "WHILEM_B_max",                 /* REGNODE_MAX +0x0f */
-       "WHILEM_B_max_fail",            /* REGNODE_MAX +0x10 */
-       "BRANCH_next",                  /* REGNODE_MAX +0x11 */
-       "BRANCH_next_fail",             /* REGNODE_MAX +0x12 */
-       "CURLYM_A",                     /* REGNODE_MAX +0x13 */
-       "CURLYM_A_fail",                /* REGNODE_MAX +0x14 */
-       "CURLYM_B",                     /* REGNODE_MAX +0x15 */
-       "CURLYM_B_fail",                /* REGNODE_MAX +0x16 */
-       "IFMATCH_A",                    /* REGNODE_MAX +0x17 */
-       "IFMATCH_A_fail",               /* REGNODE_MAX +0x18 */
-       "CURLY_B_min_known",            /* REGNODE_MAX +0x19 */
-       "CURLY_B_min_known_fail",       /* REGNODE_MAX +0x1a */
-       "CURLY_B_min",                  /* REGNODE_MAX +0x1b */
-       "CURLY_B_min_fail",             /* REGNODE_MAX +0x1c */
-       "CURLY_B_max",                  /* REGNODE_MAX +0x1d */
-       "CURLY_B_max_fail",             /* REGNODE_MAX +0x1e */
-       "COMMIT_next",                  /* REGNODE_MAX +0x1f */
-       "COMMIT_next_fail",             /* REGNODE_MAX +0x20 */
-       "MARKPOINT_next",               /* REGNODE_MAX +0x21 */
-       "MARKPOINT_next_fail",          /* REGNODE_MAX +0x22 */
-       "SKIP_next",                    /* REGNODE_MAX +0x23 */
-       "SKIP_next_fail",               /* REGNODE_MAX +0x24 */
-       "CUTGROUP_next",                /* REGNODE_MAX +0x25 */
-       "CUTGROUP_next_fail",           /* REGNODE_MAX +0x26 */
-       "KEEPS_next",                   /* REGNODE_MAX +0x27 */
-       "KEEPS_next_fail",              /* REGNODE_MAX +0x28 */
+       "EVAL_B",                       /* REGNODE_MAX +0x03 */
+       "EVAL_B_fail",                  /* REGNODE_MAX +0x04 */
+       "EVAL_postponed_AB",            /* REGNODE_MAX +0x05 */
+       "EVAL_postponed_AB_fail",       /* REGNODE_MAX +0x06 */
+       "CURLYX_end",                   /* REGNODE_MAX +0x07 */
+       "CURLYX_end_fail",              /* REGNODE_MAX +0x08 */
+       "WHILEM_A_pre",                 /* REGNODE_MAX +0x09 */
+       "WHILEM_A_pre_fail",            /* REGNODE_MAX +0x0a */
+       "WHILEM_A_min",                 /* REGNODE_MAX +0x0b */
+       "WHILEM_A_min_fail",            /* REGNODE_MAX +0x0c */
+       "WHILEM_A_max",                 /* REGNODE_MAX +0x0d */
+       "WHILEM_A_max_fail",            /* REGNODE_MAX +0x0e */
+       "WHILEM_B_min",                 /* REGNODE_MAX +0x0f */
+       "WHILEM_B_min_fail",            /* REGNODE_MAX +0x10 */
+       "WHILEM_B_max",                 /* REGNODE_MAX +0x11 */
+       "WHILEM_B_max_fail",            /* REGNODE_MAX +0x12 */
+       "BRANCH_next",                  /* REGNODE_MAX +0x13 */
+       "BRANCH_next_fail",             /* REGNODE_MAX +0x14 */
+       "CURLYM_A",                     /* REGNODE_MAX +0x15 */
+       "CURLYM_A_fail",                /* REGNODE_MAX +0x16 */
+       "CURLYM_B",                     /* REGNODE_MAX +0x17 */
+       "CURLYM_B_fail",                /* REGNODE_MAX +0x18 */
+       "IFMATCH_A",                    /* REGNODE_MAX +0x19 */
+       "IFMATCH_A_fail",               /* REGNODE_MAX +0x1a */
+       "CURLY_B_min_known",            /* REGNODE_MAX +0x1b */
+       "CURLY_B_min_known_fail",       /* REGNODE_MAX +0x1c */
+       "CURLY_B_min",                  /* REGNODE_MAX +0x1d */
+       "CURLY_B_min_fail",             /* REGNODE_MAX +0x1e */
+       "CURLY_B_max",                  /* REGNODE_MAX +0x1f */
+       "CURLY_B_max_fail",             /* REGNODE_MAX +0x20 */
+       "COMMIT_next",                  /* REGNODE_MAX +0x21 */
+       "COMMIT_next_fail",             /* REGNODE_MAX +0x22 */
+       "MARKPOINT_next",               /* REGNODE_MAX +0x23 */
+       "MARKPOINT_next_fail",          /* REGNODE_MAX +0x24 */
+       "SKIP_next",                    /* REGNODE_MAX +0x25 */
+       "SKIP_next_fail",               /* REGNODE_MAX +0x26 */
+       "CUTGROUP_next",                /* REGNODE_MAX +0x27 */
+       "CUTGROUP_next_fail",           /* REGNODE_MAX +0x28 */
+       "KEEPS_next",                   /* REGNODE_MAX +0x29 */
+       "KEEPS_next_fail",              /* REGNODE_MAX +0x2a */
 };
 #endif /* DOINIT */
 
index f23a79c..6921d38 100644 (file)
@@ -22,7 +22,7 @@ BEGIN {
 }
 
 
-plan tests => 532;  # Update this when adding/deleting tests.
+plan tests => 533;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -1271,6 +1271,26 @@ sub run_tests {
     }->();
 
 
+    # RT #126697
+    # savestack wasn't always being unwound on EVAL failure
+    {
+        local our $i = 0;
+        my $max = 0;
+
+        'ABC' =~ m{
+            \A
+            (?:
+                (?: AB | A | BC )
+                (?{
+                    local $i = $i + 1;
+                    $max = $i if $max < $i;
+                })
+            )*
+            \z
+        }x;
+        is $max, 2, "RT #126697";
+    }
+
 
 } # End of sub run_tests