This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Subject: PATCH: [perl #132063]: Heap buffer overflow
authorKarl Williamson <khw@cpan.org>
Tue, 6 Feb 2018 21:50:48 +0000 (14:50 -0700)
committerKarl Williamson <khw@cpan.org>
Tue, 17 Apr 2018 02:50:02 +0000 (20:50 -0600)
There were three things that were fixed as a result of this ticket, any
one of which would have avoided the issue.

Commit 421da25c4318861925129cd1b17263289db3443c already has fixed
one of those.  The issue was reading beyond the end of a buffer, and
that commit keeps from reading beyond a NUL, which normally should be
present, marking the end of the buffer.

This commit fixes the issue where the code was told that reading that
many bytes was ok to do.  This is several instances in regexec.c of the
code assuming that the input was valid UTF-8, whereas the input was too
short for what the start byte claimed it would be.

I grepped through the core for any other similar uses, and did not find
any.

The next commit will fix the third thing.

regexec.c
t/lib/warnings/regexec

index 4b537f6..649d3f9 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -1796,7 +1796,9 @@ Perl_re_intuit_start(pTHX_
                                            ? trie_utf8_fold                         \
                                            :   trie_latin_utf8_fold)))
 
-#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
+/* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is
+ * 'foldbuf+sizeof(foldbuf)' */
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uc_end, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
 STMT_START {                                                                        \
     STRLEN skiplen;                                                                 \
     U8 flags = FOLD_FLAGS_FULL;                                                     \
@@ -1804,7 +1806,7 @@ STMT_START {
     case trie_flu8:                                                                 \
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                         \
         if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) {                             \
-            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc));          \
+            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end - uc);                \
         }                                                                           \
         goto do_trie_utf8_fold;                                                     \
     case trie_utf8_exactfa_fold:                                                    \
@@ -1813,14 +1815,14 @@ STMT_START {
     case trie_utf8_fold:                                                            \
       do_trie_utf8_fold:                                                            \
         if ( foldlen>0 ) {                                                          \
-            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
+            uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags );     \
             foldlen -= len;                                                         \
             uscan += len;                                                           \
             len=0;                                                                  \
         } else {                                                                    \
-            len = UTF8SKIP(uc);                                                     \
-            uvc = _toFOLD_utf8_flags( (const U8*) uc, uc + len, foldbuf, &foldlen,  \
+            uvc = _toFOLD_utf8_flags( (const U8*) uc, uc_end, foldbuf, &foldlen,    \
                                                                             flags); \
+            len = UTF8SKIP(uc);                                                     \
             skiplen = UVCHR_SKIP( uvc );                                            \
             foldlen -= skiplen;                                                     \
             uscan = foldbuf + skiplen;                                              \
@@ -1831,7 +1833,7 @@ STMT_START {
         /* FALLTHROUGH */                                                           \
     case trie_latin_utf8_fold:                                                      \
         if ( foldlen>0 ) {                                                          \
-            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
+            uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags );     \
             foldlen -= len;                                                         \
             uscan += len;                                                           \
             len=0;                                                                  \
@@ -1850,7 +1852,7 @@ STMT_START {
         }                                                                           \
         /* FALLTHROUGH */                                                           \
     case trie_utf8:                                                                 \
-        uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags );        \
+        uvc = utf8n_to_uvchr( (const U8*) uc, uc_end - uc, &len, uniflags );        \
         break;                                                                      \
     case trie_plain:                                                                \
         uvc = (UV)*uc;                                                              \
@@ -2971,10 +2973,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                     }
                     points[pointpos++ % maxlen]= uc;
                     if (foldlen || uc < (U8*)strend) {
-                        REXEC_TRIE_READ_CHAR(trie_type, trie,
-                                         widecharmap, uc,
-                                         uscan, len, uvc, charid, foldlen,
-                                         foldbuf, uniflags);
+                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
+                                             (U8 *) strend, uscan, len, uvc,
+                                             charid, foldlen, foldbuf,
+                                             uniflags);
                         DEBUG_TRIE_EXECUTE_r({
                             dump_exec_pos( (char *)uc, c, strend,
                                         real_start, s, utf8_target, 0);
@@ -6003,8 +6005,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                    if ( base && (foldlen || uc < (U8*)(reginfo->strend))) {
                        I32 offset;
                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
-                                            uscan, len, uvc, charid, foldlen,
-                                            foldbuf, uniflags);
+                                             (U8 *) reginfo->strend, uscan,
+                                             len, uvc, charid, foldlen,
+                                             foldbuf, uniflags);
                        charcount++;
                        if (foldlen>0)
                            ST.longfold = TRUE;
@@ -6139,8 +6142,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
                        while (foldlen) {
                            if (!--chars)
                                break;
-                           uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len,
-                                           uniflags);
+                           uvc = utf8n_to_uvchr(uscan, foldlen, &len,
+                                                 uniflags);
                            uscan += len;
                            foldlen -= len;
                        }
index 900dd6e..5ce9c3a 100644 (file)
@@ -260,3 +260,10 @@ setlocale(&POSIX::LC_CTYPE, $utf8_locale);
 "k" =~ /(?[ \N{KELVIN SIGN} ])/i;
 ":" =~ /(?[ \: ])/;
 EXPECT
+########
+# NAME perl #132063, read beyond buffer end
+"\xfe" =~ /(?il)\x{100}|\x{100}/;
+EXPECT
+OPTIONS fatal regex
+Malformed UTF-8 character: \\xfe \(too short; 1 byte available, need \d+\) in pattern match \(m//\) at - line 1.
+Malformed UTF-8 character \(fatal\) at - line 1.