fix "bad match" issue reported in perl #127705
authorYves Orton <demerphq@gmail.com>
Mon, 14 Mar 2016 22:30:02 +0000 (23:30 +0100)
committerYves Orton <demerphq@gmail.com>
Mon, 14 Mar 2016 23:31:59 +0000 (00:31 +0100)
In 24be310237a0f8f19cfdb71de1b068b4ce9572a0 I reworked how
we stored the close_paren info in the regexp match state
structure. Unfortunately I missed a subtle aspect of the
logic which meant that in certain cases we were relying
on close_paren being true to avoid comparing it against
a false ARG value for things like CURLYX, which meant that
sometimes we would exit an stack frame prematurely. This
patch fixes that logic and makes it more clear (via macros)
what is going on.

regexec.c
regexp.h
t/re/pat.t

index c1674e2..f2e0164 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -5135,6 +5135,28 @@ S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos,
     return wb;
 }
 
+#define EVAL_CLOSE_PAREN_IS(st,expr)                        \
+(                                                           \
+    (   ( st )                                         ) && \
+    (   ( st )->u.eval.close_paren                     ) && \
+    ( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) )    \
+)
+
+#define EVAL_CLOSE_PAREN_IS_TRUE(st,expr)                   \
+(                                                           \
+    (   ( st )                                         ) && \
+    (   ( st )->u.eval.close_paren                     ) && \
+    (   ( expr )                                       ) && \
+    ( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) )    \
+)
+
+
+#define EVAL_CLOSE_PAREN_SET(st,expr) \
+    (st)->u.eval.close_paren = ( (expr) + 1 )
+
+#define EVAL_CLOSE_PAREN_CLEAR(st) \
+    (st)->u.eval.close_paren = 0
+
 /* returns -1 on failure, $+[0] on success */
 STATIC SSize_t
 S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
@@ -7011,6 +7033,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 
         case INSUBP:   /*  (?(R))  */
             n = ARG(scan);
+            /* this does not need to use EVAL_CLOSE_PAREN macros, as the arg
+             * of SCAN is already set up as matches a eval.close_paren */
             sw = cur_eval && (n == 0 || CUR_EVAL.close_paren == n);
             break;
 
@@ -7535,7 +7559,7 @@ NULL
                           depth, (IV) ST.count, (IV)ST.alen)
            );
 
-            if (EVAL_CLOSE_PAREN_IS(cur_eval,(U32)ST.me->flags))
+            if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
                goto fake_end;
                
            {
@@ -7550,7 +7574,7 @@ NULL
 
 
            if (ST.minmod || ST.count < ARG1(ST.me) /* min*/ 
-                || EVAL_CLOSE_PAREN_IS(cur_eval,(U32)ST.me->flags))
+                || EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
                sayNO;
 
          curlym_do_B: /* execute the B in /A{m,n}B/  */
@@ -7630,7 +7654,7 @@ NULL
                else
                    rex->offs[paren].end = -1;
 
-                if (EVAL_CLOSE_PAREN_IS(cur_eval,(U32)ST.me->flags))
+                if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
                {
                    if (ST.count) 
                        goto fake_end;
@@ -7699,7 +7723,7 @@ NULL
                maxopenparen = ST.paren;
            ST.min = ARG1(scan);  /* min to match */
            ST.max = ARG2(scan);  /* max to match */
-            if (EVAL_CLOSE_PAREN_IS(cur_eval,(U32)ST.paren))
+            if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
             {
                ST.min=1;
                ST.max=1;
@@ -7887,7 +7911,7 @@ NULL
                     assert(n == REG_INFTY || locinput == li);
                }
                CURLY_SETPAREN(ST.paren, ST.count);
-                if (EVAL_CLOSE_PAREN_IS(cur_eval,(U32)ST.paren))
+                if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
                    goto fake_end;
                PUSH_STATE_GOTO(CURLY_B_min_known, ST.B, locinput);
            }
@@ -7915,7 +7939,7 @@ NULL
                {
                  curly_try_B_min:
                    CURLY_SETPAREN(ST.paren, ST.count);
-                    if (EVAL_CLOSE_PAREN_IS(cur_eval,(U32)ST.paren))
+                    if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
                         goto fake_end;
                    PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput);
                }
@@ -7925,7 +7949,7 @@ NULL
 
           curly_try_B_max:
            /* a successful greedy match: now try to match B */
-            if (EVAL_CLOSE_PAREN_IS(cur_eval,(U32)ST.paren))
+            if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
                 goto fake_end;
            {
                bool could_match = locinput < reginfo->strend;
index 65e0426..78aa899 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -840,18 +840,7 @@ typedef struct regmatch_state {
     } u;
 } regmatch_state;
 
-#define EVAL_CLOSE_PAREN_IS(st,expr) \
-(\
-    ( ( st )                                         ) && \
-    ( ( st )->u.eval.close_paren                     ) && \
-    ( ( ( st )->u.eval.close_paren - 1 ) == ( expr ) ) \
-)
-
-#define EVAL_CLOSE_PAREN_SET(st,expr) \
-    (st)->u.eval.close_paren = (expr) + 1
-
-#define EVAL_CLOSE_PAREN_CLEAR(st) \
-    (st)->u.eval.close_paren = 0
+
 
 /* how many regmatch_state structs to allocate as a single slab.
  * We do it in 4K blocks for efficiency. The "3" is 2 for the next/prev
index 2a356ef..295a9f7 100644 (file)
@@ -23,7 +23,7 @@ BEGIN {
     skip_all_without_unicode_tables();
 }
 
-plan tests => 785;  # Update this when adding/deleting tests.
+plan tests => 789;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -1724,7 +1724,6 @@ EOP
         {
             my $bug="[perl #126182]"; # test for infinite pattern recursion
             for my $tuple (
-
                     [ 'q(a)=~/(.(?2))((?<=(?=(?1)).))/', "died", "look ahead left recursion fails fast" ],
                     [ 'q(aa)=~/(?R)a/', "died", "left-recursion fails fast", ],
                     [ 'q(bbaa)=~/(?&x)(?(DEFINE)(?<x>(?&y)*a)(?<y>(?&x)*b))/',
@@ -1736,14 +1735,24 @@ EOP
                     [ 'q(abc) =~ /a((?1){0,3})c/', "died", "{0,3} left recursion fails fast" ],
 
                     [ 'q(aaabbb)=~/a(?R)?b/', "matched", "optional self recursion works" ],
+                    [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?0))*+\\\\))/', "matched",
+                        "recursion and possessive captures", "((5maa-maa)(maa-3maa))"],
+                    [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?1))*+\\\\))/', "matched",
+                        "recursion and possessive captures", "((5maa-maa)(maa-3maa))"],
+                    [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?0))*\\\\))/', "matched",
+                        "recursion and possessive captures", "((5maa-maa)(maa-3maa))"],
+                    [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?1))*\\\\))/', "matched",
+                        "recursion and possessive captures", "((5maa-maa)(maa-3maa))"],
             ) {
-                my ($expr, $expect, $test_name)= @$tuple;
+                my ($expr, $expect, $test_name, $cap1)= @$tuple;
                 # avoid quotes in this code!
                 my $code='
                     BEGIN{require q(test.pl);}
                     watchdog(3);
-                    my $status= eval(qq{ (' . $expr . ') ? q(matched) : q(failed) })
-                                || ( ( $@ =~ /Infinite recursion/ ) ? q(died) : q(strange-death) );
+                    my $status= eval(q{ !(' . $expr . ') ? q(failed) : ' .
+                        ($cap1 ? '($1 ne q['.$cap1.']) ? qq(badmatch:$1) : ' : '') .
+                        ' q(matched) })
+                                || ( ( $@ =~ /Infinite recursion/ ) ? qq(died) : q(strange-death) );
                     print $status;
                 ';
                 fresh_perl_is($code, $expect, {}, "$bug - $test_name" );