This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
study_chunk: honour mutate_ok over recursion
authorHugo van der Sanden <hv@crypt.org>
Tue, 28 Apr 2020 17:52:44 +0000 (18:52 +0100)
committerSteve Hay <steve.m.hay@googlemail.com>
Mon, 1 Jun 2020 19:35:51 +0000 (20:35 +0100)
As described in #17743, study_chunk can re-enter itself either by
simple recursion or by enframing. 089ad25d3f used the new mutate_ok
variable to track whether we were within the framing scope of GOSUB,
and to disallow mutating changes to ops if so.

This commit extends that logic to reentry by recursion, passing in
the current state as was_mutate_ok.

(CVE-2020-12723)

(cherry picked from commit 3445383845ed220eaa12cd406db2067eb7b8a741)

embed.fnc
embed.h
proto.h
regcomp.c
t/re/pat.t

index 4fb75ab..589ab1a 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -2045,7 +2045,7 @@ ES        |SSize_t|study_chunk    |NN RExC_state_t *pRExC_state \
                                |NULLOK struct scan_data_t *data \
                                 |I32 stopparen|U32 recursed_depth \
                                |NULLOK regnode_ssc *and_withp \
                                |NULLOK struct scan_data_t *data \
                                 |I32 stopparen|U32 recursed_depth \
                                |NULLOK regnode_ssc *and_withp \
-                               |U32 flags|U32 depth
+                               |U32 flags|U32 depth|bool was_mutate_ok
 ES     |void   |rck_elide_nothing|NN regnode *node
 ESR    |SV *   |get_ANYOFM_contents|NN const regnode * n
 ESRT   |U32    |add_data       |NN RExC_state_t* const pRExC_state \
 ES     |void   |rck_elide_nothing|NN regnode *node
 ESR    |SV *   |get_ANYOFM_contents|NN const regnode * n
 ESRT   |U32    |add_data       |NN RExC_state_t* const pRExC_state \
diff --git a/embed.h b/embed.h
index 255bf59..182b12a 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define ssc_is_cp_posixl_init  S_ssc_is_cp_posixl_init
 #define ssc_or(a,b,c)          S_ssc_or(aTHX_ a,b,c)
 #define ssc_union(a,b,c)       S_ssc_union(aTHX_ a,b,c)
 #define ssc_is_cp_posixl_init  S_ssc_is_cp_posixl_init
 #define ssc_or(a,b,c)          S_ssc_or(aTHX_ a,b,c)
 #define ssc_union(a,b,c)       S_ssc_union(aTHX_ a,b,c)
-#define study_chunk(a,b,c,d,e,f,g,h,i,j,k)     S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
+#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l)   S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
 #  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) || defined(PERL_IN_OP_C)
 #define _invlist_dump(a,b,c,d) Perl__invlist_dump(aTHX_ a,b,c,d)
 #  endif
 #  if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) || defined(PERL_IN_OP_C)
 #define _invlist_dump(a,b,c,d) Perl__invlist_dump(aTHX_ a,b,c,d)
diff --git a/proto.h b/proto.h
index 88ec328..02ef4ed 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -5934,7 +5934,7 @@ STATIC void       S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc, co
 STATIC void    S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd);
 #define PERL_ARGS_ASSERT_SSC_UNION     \
        assert(ssc); assert(invlist)
 STATIC void    S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd);
 #define PERL_ARGS_ASSERT_SSC_UNION     \
        assert(ssc); assert(invlist)
-STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth);
+STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok);
 #define PERL_ARGS_ASSERT_STUDY_CHUNK   \
        assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last)
 #endif
 #define PERL_ARGS_ASSERT_STUDY_CHUNK   \
        assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last)
 #endif
index 79c909d..640c99d 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -4575,7 +4575,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                        I32 stopparen,
                         U32 recursed_depth,
                        regnode_ssc *and_withp,
                        I32 stopparen,
                         U32 recursed_depth,
                        regnode_ssc *and_withp,
-                       U32 flags, U32 depth)
+                       U32 flags, U32 depth, bool was_mutate_ok)
                        /* scanp: Start here (read-write). */
                        /* deltap: Write maxlen-minlen here. */
                        /* last: Stop before this one. */
                        /* scanp: Start here (read-write). */
                        /* deltap: Write maxlen-minlen here. */
                        /* last: Stop before this one. */
@@ -4647,7 +4647,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                    node length to get a real minimum (because
                                    the folded version may be shorter) */
        bool unfolded_multi_char = FALSE;
                                    node length to get a real minimum (because
                                    the folded version may be shorter) */
        bool unfolded_multi_char = FALSE;
-        bool mutate_ok = (frame && frame->in_gosub) ? 0 : 1;
+        /* avoid mutating ops if we are anywhere within the recursed or
+         * enframed handling for a GOSUB: the outermost level will handle it.
+         */
+        bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
        /* Peephole optimizer: */
         DEBUG_STUDYDATA("Peep", data, depth, is_inf);
         DEBUG_PEEP("Peep", scan, depth, flags);
        /* Peephole optimizer: */
         DEBUG_STUDYDATA("Peep", data, depth, is_inf);
         DEBUG_PEEP("Peep", scan, depth, flags);
@@ -4697,7 +4700,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             /* DEFINEP study_chunk() recursion */
             (void)study_chunk(pRExC_state, &scan, &minlen,
                               &deltanext, next, &data_fake, stopparen,
             /* DEFINEP study_chunk() recursion */
             (void)study_chunk(pRExC_state, &scan, &minlen,
                               &deltanext, next, &data_fake, stopparen,
-                              recursed_depth, NULL, f, depth+1);
+                              recursed_depth, NULL, f, depth+1, mutate_ok);
 
             scan = next;
         } else
 
             scan = next;
         } else
@@ -4765,7 +4768,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     /* recurse study_chunk() for each BRANCH in an alternation */
                    minnext = study_chunk(pRExC_state, &scan, minlenp,
                                       &deltanext, next, &data_fake, stopparen,
                     /* recurse study_chunk() for each BRANCH in an alternation */
                    minnext = study_chunk(pRExC_state, &scan, minlenp,
                                       &deltanext, next, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1);
+                                      recursed_depth, NULL, f, depth+1,
+                                      mutate_ok);
 
                    if (min1 > minnext)
                        min1 = minnext;
 
                    if (min1 > minnext)
                        min1 = minnext;
@@ -5569,7 +5573,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                   (mincount == 0
                                    ? (f & ~SCF_DO_SUBSTR)
                                    : f)
                                   (mincount == 0
                                    ? (f & ~SCF_DO_SUBSTR)
                                    : f)
-                                  ,depth+1);
+                                  , depth+1, mutate_ok);
 
                if (flags & SCF_DO_STCLASS)
                    data->start_class = oclass;
 
                if (flags & SCF_DO_STCLASS)
                    data->start_class = oclass;
@@ -5743,7 +5747,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         /* recurse study_chunk() on optimised CURLYX => CURLYM */
                        study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
                                     NULL, stopparen, recursed_depth, NULL, 0,
                         /* recurse study_chunk() on optimised CURLYX => CURLYM */
                        study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
                                     NULL, stopparen, recursed_depth, NULL, 0,
-                                    depth+1);
+                                    depth+1, mutate_ok);
                    }
                    else
                        oscan->flags = 0;
                    }
                    else
                        oscan->flags = 0;
@@ -6172,7 +6176,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 /* recurse study_chunk() for lookahead body */
                 minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
                                       last, &data_fake, stopparen,
                 /* recurse study_chunk() for lookahead body */
                 minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
                                       last, &data_fake, stopparen,
-                                      recursed_depth, NULL, f, depth+1);
+                                      recursed_depth, NULL, f, depth+1,
+                                      mutate_ok);
                 if (scan->flags) {
                     if (   deltanext < 0
                         || deltanext > (I32) U8_MAX
                 if (scan->flags) {
                     if (   deltanext < 0
                         || deltanext > (I32) U8_MAX
@@ -6277,7 +6282,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                 *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
                                         &deltanext, last, &data_fake,
                                         stopparen, recursed_depth, NULL,
                 *minnextp = study_chunk(pRExC_state, &nscan, minnextp,
                                         &deltanext, last, &data_fake,
                                         stopparen, recursed_depth, NULL,
-                                        f, depth+1);
+                                        f, depth+1, mutate_ok);
                 if (scan->flags) {
                     assert(0);  /* This code has never been tested since this
                                    is normally not compiled */
                 if (scan->flags) {
                     assert(0);  /* This code has never been tested since this
                                    is normally not compiled */
@@ -6444,7 +6449,8 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                         /* optimise study_chunk() for TRIE */
                         minnext = study_chunk(pRExC_state, &scan, minlenp,
                             &deltanext, (regnode *)nextbranch, &data_fake,
                         /* optimise study_chunk() for TRIE */
                         minnext = study_chunk(pRExC_state, &scan, minlenp,
                             &deltanext, (regnode *)nextbranch, &data_fake,
-                            stopparen, recursed_depth, NULL, f, depth+1);
+                            stopparen, recursed_depth, NULL, f, depth+1,
+                            mutate_ok);
                     }
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                         nextbranch= regnext((regnode*)nextbranch);
                     }
                     if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
                         nextbranch= regnext((regnode*)nextbranch);
@@ -8239,7 +8245,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             &data, -1, 0, NULL,
             SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
                           | (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
             &data, -1, 0, NULL,
             SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
                           | (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
-            0);
+            0, TRUE);
 
 
         CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
 
 
         CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
@@ -8368,7 +8374,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
             SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
                                                       ? SCF_TRIE_DOING_RESTUDY
                                                       : 0),
             SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
                                                       ? SCF_TRIE_DOING_RESTUDY
                                                       : 0),
-            0);
+            0, TRUE);
 
         CHECK_RESTUDY_GOTO_butfirst(NOOP);
 
 
         CHECK_RESTUDY_GOTO_butfirst(NOOP);
 
index 6ece306..c608df5 100644 (file)
@@ -24,7 +24,7 @@ BEGIN {
 
 skip_all_without_unicode_tables();
 
 
 skip_all_without_unicode_tables();
 
-plan tests => 1020;  # Update this when adding/deleting tests.
+plan tests => 1022;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
 
 run_tests() unless caller;
 
@@ -2271,6 +2271,18 @@ SKIP:
         }, 'ok', {}, 'gh17730: should not crash');
     }
 
         }, 'ok', {}, 'gh17730: should not crash');
     }
 
+    # gh17743: more regexp corruption via GOSUB
+    {
+        fresh_perl_is(q{
+            "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok"
+        }, 'ok', {}, 'gh17743: test regexp corruption (1)');
+
+        fresh_perl_is(q{
+            "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/;
+            print "ok"
+        }, 'ok', {}, 'gh17743: test regexp corruption (2)');
+    }
+
 } # End of sub run_tests
 
 1;
 } # End of sub run_tests
 
 1;