This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
study_chunk: extract rck_elide_nothing
authorHugo van der Sanden <hv@crypt.org>
Tue, 18 Feb 2020 13:51:16 +0000 (13:51 +0000)
committerSteve Hay <steve.m.hay@googlemail.com>
Mon, 1 Jun 2020 19:35:50 +0000 (20:35 +0100)
(CVE-2020-10878)

(cherry picked from commit 4fccd2d99bdeb28c2937c3220ea5334999564aa8)

embed.fnc
embed.h
proto.h
regcomp.c

index 7db75f5..4fb75ab 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -2046,6 +2046,7 @@ ES        |SSize_t|study_chunk    |NN RExC_state_t *pRExC_state \
                                 |I32 stopparen|U32 recursed_depth \
                                |NULLOK regnode_ssc *and_withp \
                                |U32 flags|U32 depth
+ES     |void   |rck_elide_nothing|NN regnode *node
 ESR    |SV *   |get_ANYOFM_contents|NN const regnode * n
 ESRT   |U32    |add_data       |NN RExC_state_t* const pRExC_state \
                                |NN const char* const s|const U32 n
diff --git a/embed.h b/embed.h
index 8418e8c..255bf59 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a)
 #define parse_uniprop_string(a,b,c,d,e,f,g,h,i,j)      S_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h,i,j)
 #define populate_ANYOF_from_invlist(a,b)       S_populate_ANYOF_from_invlist(aTHX_ a,b)
+#define rck_elide_nothing(a)   S_rck_elide_nothing(aTHX_ a)
 #define reg(a,b,c,d)           S_reg(aTHX_ a,b,c,d)
 #define reg2Lanode(a,b,c,d)    S_reg2Lanode(aTHX_ a,b,c,d)
 #define reg_node(a,b)          S_reg_node(aTHX_ a,b)
diff --git a/proto.h b/proto.h
index 6d6cccf..88ec328 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -5823,6 +5823,9 @@ STATIC SV *       S_parse_uniprop_string(pTHX_ const char * const name, Size_t name_le
 STATIC void    S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr);
 #define PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST   \
        assert(node); assert(invlist_ptr)
+STATIC void    S_rck_elide_nothing(pTHX_ regnode *node);
+#define PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING     \
+       assert(node)
 PERL_STATIC_NO_RET void        S_re_croak(pTHX_ bool utf8, const char* pat, ...)
                        __attribute__noreturn__
                        __attribute__format__(__printf__,pTHX_2,pTHX_3);
index 203dbdc..b02ae9b 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -4528,6 +4528,44 @@ S_unwind_scan_frames(pTHX_ const void *p)
     } while (f);
 }
 
+/* Follow the next-chain of the current node and optimize away
+   all the NOTHINGs from it.
+ */
+STATIC void
+S_rck_elide_nothing(pTHX_ regnode *node)
+{
+    dVAR;
+
+    PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
+
+    if (OP(node) != CURLYX) {
+        const int max = (reg_off_by_arg[OP(node)]
+                        ? I32_MAX
+                          /* I32 may be smaller than U16 on CRAYs! */
+                        : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
+        int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node));
+        int noff;
+        regnode *n = node;
+
+        /* Skip NOTHING and LONGJMP. */
+        while (
+            (n = regnext(n))
+            && (
+                (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
+                || ((OP(n) == LONGJMP) && (noff = ARG(n)))
+            )
+            && off + noff < max
+        ) {
+            off += noff;
+        }
+        if (reg_off_by_arg[OP(node)])
+            ARG(node) = off;
+        else
+            NEXT_OFF(node) = off;
+    }
+    return;
+}
+
 /* the return from this sub is the minimum length that could possibly match */
 STATIC SSize_t
 S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
@@ -4630,27 +4668,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
         }
 
         /* Follow the next-chain of the current node and optimize
-           away all the NOTHINGs from it.  */
-        if (OP(scan) != CURLYX) {
-            const int max = (reg_off_by_arg[OP(scan)]
-                            ? I32_MAX
-                              /* I32 may be smaller than U16 on CRAYs! */
-                            : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
-            int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
-            int noff;
-            regnode *n = scan;
-
-            /* Skip NOTHING and LONGJMP. */
-            while (   (n = regnext(n))
-                   && (   (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
-                       || ((OP(n) == LONGJMP) && (noff = ARG(n))))
-                   && off + noff < max)
-                off += noff;
-            if (reg_off_by_arg[OP(scan)])
-                ARG(scan) = off;
-            else
-                NEXT_OFF(scan) = off;
-        }
+           away all the NOTHINGs from it.
+         */
+        rck_elide_nothing(scan);
 
         /* The principal pseudo-switch.  Cannot be a switch, since we look into
          * several different things.  */
@@ -5847,11 +5867,7 @@ Perl_re_printf( aTHX_  "LHS=%" UVuf " RHS=%" UVuf "\n",
                if (data && (fl & SF_HAS_EVAL))
                    data->flags |= SF_HAS_EVAL;
              optimize_curly_tail:
-               if (OP(oscan) != CURLYX) {
-                   while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
-                          && NEXT_OFF(next))
-                       NEXT_OFF(oscan) += NEXT_OFF(next);
-               }
+               rck_elide_nothing(oscan);
                continue;
 
            default: