From 3a1df45e827a79d14694d18dd0141c09a0abfe5c Mon Sep 17 00:00:00 2001 From: Hugo van der Sanden Date: Tue, 28 Apr 2020 18:52:44 +0100 Subject: [PATCH] study_chunk: honour mutate_ok over recursion As described in #17743, study_chunk can re-enter itself either by simple recursion or by enframing. 089ad25d3f used the new mutate_ok variable to track whether we were within the framing scope of GOSUB, and to disallow mutating changes to ops if so. This commit extends that logic to reentry by recursion, passing in the current state as was_mutate_ok. (CVE-2020-12723) (cherry picked from commit 3445383845ed220eaa12cd406db2067eb7b8a741) --- embed.fnc | 2 +- embed.h | 2 +- proto.h | 2 +- regcomp.c | 28 +++++++++++++++++----------- t/re/pat.t | 14 +++++++++++++- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/embed.fnc b/embed.fnc index 4fb75ab..589ab1a 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2045,7 +2045,7 @@ ES |SSize_t|study_chunk |NN RExC_state_t *pRExC_state \ |NULLOK struct scan_data_t *data \ |I32 stopparen|U32 recursed_depth \ |NULLOK regnode_ssc *and_withp \ - |U32 flags|U32 depth + |U32 flags|U32 depth|bool was_mutate_ok ES |void |rck_elide_nothing|NN regnode *node ESR |SV * |get_ANYOFM_contents|NN const regnode * n ESRT |U32 |add_data |NN RExC_state_t* const pRExC_state \ diff --git a/embed.h b/embed.h index 255bf59..182b12a 100644 --- a/embed.h +++ b/embed.h @@ -1090,7 +1090,7 @@ #define ssc_is_cp_posixl_init S_ssc_is_cp_posixl_init #define ssc_or(a,b,c) S_ssc_or(aTHX_ a,b,c) #define ssc_union(a,b,c) S_ssc_union(aTHX_ a,b,c) -#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k) +#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l) # endif # if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) || defined(PERL_IN_OP_C) #define _invlist_dump(a,b,c,d) Perl__invlist_dump(aTHX_ a,b,c,d) diff --git a/proto.h b/proto.h index 88ec328..02ef4ed 100644 --- a/proto.h +++ b/proto.h @@ -5934,7 +5934,7 @@ STATIC void S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc, co STATIC void S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd); #define PERL_ARGS_ASSERT_SSC_UNION \ assert(ssc); assert(invlist) -STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth); +STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok); #define PERL_ARGS_ASSERT_STUDY_CHUNK \ assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last) #endif diff --git a/regcomp.c b/regcomp.c index 79c909d..640c99d 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4575,7 +4575,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, - U32 flags, U32 depth) + U32 flags, U32 depth, bool was_mutate_ok) /* scanp: Start here (read-write). */ /* deltap: Write maxlen-minlen here. */ /* last: Stop before this one. */ @@ -4647,7 +4647,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, node length to get a real minimum (because the folded version may be shorter) */ bool unfolded_multi_char = FALSE; - bool mutate_ok = (frame && frame->in_gosub) ? 0 : 1; + /* avoid mutating ops if we are anywhere within the recursed or + * enframed handling for a GOSUB: the outermost level will handle it. + */ + bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub); /* Peephole optimizer: */ DEBUG_STUDYDATA("Peep", data, depth, is_inf); DEBUG_PEEP("Peep", scan, depth, flags); @@ -4697,7 +4700,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* DEFINEP study_chunk() recursion */ (void)study_chunk(pRExC_state, &scan, &minlen, &deltanext, next, &data_fake, stopparen, - recursed_depth, NULL, f, depth+1); + recursed_depth, NULL, f, depth+1, mutate_ok); scan = next; } else @@ -4765,7 +4768,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* recurse study_chunk() for each BRANCH in an alternation */ minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, next, &data_fake, stopparen, - recursed_depth, NULL, f, depth+1); + recursed_depth, NULL, f, depth+1, + mutate_ok); if (min1 > minnext) min1 = minnext; @@ -5569,7 +5573,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, (mincount == 0 ? (f & ~SCF_DO_SUBSTR) : f) - ,depth+1); + , depth+1, mutate_ok); if (flags & SCF_DO_STCLASS) data->start_class = oclass; @@ -5743,7 +5747,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* recurse study_chunk() on optimised CURLYX => CURLYM */ study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt, NULL, stopparen, recursed_depth, NULL, 0, - depth+1); + depth+1, mutate_ok); } else oscan->flags = 0; @@ -6172,7 +6176,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", /* recurse study_chunk() for lookahead body */ minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext, last, &data_fake, stopparen, - recursed_depth, NULL, f, depth+1); + recursed_depth, NULL, f, depth+1, + mutate_ok); if (scan->flags) { if ( deltanext < 0 || deltanext > (I32) U8_MAX @@ -6277,7 +6282,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", *minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext, last, &data_fake, stopparen, recursed_depth, NULL, - f, depth+1); + f, depth+1, mutate_ok); if (scan->flags) { assert(0); /* This code has never been tested since this is normally not compiled */ @@ -6444,7 +6449,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", /* optimise study_chunk() for TRIE */ minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, (regnode *)nextbranch, &data_fake, - stopparen, recursed_depth, NULL, f, depth+1); + stopparen, recursed_depth, NULL, f, depth+1, + mutate_ok); } if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH) nextbranch= regnext((regnode*)nextbranch); @@ -8239,7 +8245,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, &data, -1, 0, NULL, SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag | (restudied ? SCF_TRIE_DOING_RESTUDY : 0), - 0); + 0, TRUE); CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk")); @@ -8368,7 +8374,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied ? SCF_TRIE_DOING_RESTUDY : 0), - 0); + 0, TRUE); CHECK_RESTUDY_GOTO_butfirst(NOOP); diff --git a/t/re/pat.t b/t/re/pat.t index 6ece306..c608df5 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -24,7 +24,7 @@ BEGIN { skip_all_without_unicode_tables(); -plan tests => 1020; # Update this when adding/deleting tests. +plan tests => 1022; # Update this when adding/deleting tests. run_tests() unless caller; @@ -2271,6 +2271,18 @@ SKIP: }, 'ok', {}, 'gh17730: should not crash'); } + # gh17743: more regexp corruption via GOSUB + { + fresh_perl_is(q{ + "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok" + }, 'ok', {}, 'gh17743: test regexp corruption (1)'); + + fresh_perl_is(q{ + "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/; + print "ok" + }, 'ok', {}, 'gh17743: test regexp corruption (2)'); + } + } # End of sub run_tests 1; -- 1.8.3.1