This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regexec.c: Fix assertion failure GH #18451
authorKarl Williamson <khw@cpan.org>
Sat, 2 Jan 2021 22:42:25 +0000 (15:42 -0700)
committerKarl Williamson <khw@cpan.org>
Sun, 3 Jan 2021 12:56:55 +0000 (05:56 -0700)
This was caused by copying too many characters for the size of the
buffer.  Only one character is needed.

regexec.c
t/re/re_tests

index 3ca2288..b46693e 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -4694,24 +4694,37 @@ S_setup_EXACTISH_ST(pTHX_ const regnode * const text_node,
      *
      * Everything generally matches at least itself.  But if there is a
      * UTF8ness mismatch, we have to convert to that of the target string. */
-    if (utf8_pat == utf8_target || UTF8_IS_INVARIANT(*pat)) {
-        lengths[0] = MIN(pat_len, C_ARRAY_LENGTH(matches[0]));
-        Copy(pat, matches[0], lengths[0], U8);
+    if (UTF8_IS_INVARIANT(*pat)) {  /* Immaterial if either is in UTF-8 */
+        matches[0][0] = pat[0];
+        lengths[0] = 1;
         m->count++;
     }
-    else if (utf8_target) { /* target is UTF-8; pattern isn't */
-        matches[0][0] = UTF8_EIGHT_BIT_HI(pat[0]);
-        matches[0][1] = UTF8_EIGHT_BIT_LO(pat[0]);
-        lengths[0] = 2;
-        m->count++;
-    }
-    else { /* pattern is UTF-8, target isn't */
-        if (UTF8_IS_DOWNGRADEABLE_START(*pat)) {
-            matches[0][0] = EIGHT_BIT_UTF8_TO_NATIVE(pat[0], pat[1]);
-            lengths[0] = 1;
+    else if (utf8_target) {
+        if (utf8_pat) {
+            lengths[0] = UTF8SKIP(pat);
+            Copy(pat, matches[0], lengths[0], U8);
+            m->count++;
+        }
+        else {  /* target is UTF-8, pattern isn't */
+            matches[0][0] = UTF8_EIGHT_BIT_HI(pat[0]);
+            matches[0][1] = UTF8_EIGHT_BIT_LO(pat[0]);
+            lengths[0] = 2;
             m->count++;
         }
     }
+    else if (! utf8_pat) {  /* Neither is UTF-8 */
+        matches[0][0] = pat[0];
+        lengths[0] = 1;
+        m->count++;
+    }
+    else     /* target isn't UTF-8; pattern is.  No match possible unless the
+                pattern's first character can fit in a byte */
+         if (UTF8_IS_DOWNGRADEABLE_START(*pat))
+    {
+        matches[0][0] = EIGHT_BIT_UTF8_TO_NATIVE(pat[0], pat[1]);
+        lengths[0] = 1;
+        m->count++;
+    }
 
     /* Here we have taken care of any necessary node-type changes */
 
index ab5a0d8..ff8bd7b 100644 (file)
@@ -2013,7 +2013,7 @@ AB\s+\x{100}      AB \x{100}X     y       -       -
 (?:(?^:(?{1}))[^0-9])  :       y       $&      :       # [perl #133348]
 /[\xdf-/i      -       ca      -       Invalid [] range        # [perl #133620] likely only fails under valgrind
 /[\x59-/i      -       ce      -       Unmatched [     # [perl #133620] likely only fails under valgrind
-/\1a(b)/       bab     n       -       -               # This compiles but fails to match as \1 is not set when parsed.
+/\1a(b)/       bab     n       -       -               # This compiles but fails to match as \1 is not set when parsed
 /(?iu)(?<=\xdf)hbase/  sshbase y       $&      hbase
 /\x{30c3}?[\x{30a2}\x{30a4}\x{30a6}\x{30a8}\x{30aa}-\x{30e2}\x{30e4}\x{30e6}\x{30e8}-\x{30f4}](?:[\x{30e3}\x{30e5}\x{30e7}\x{30a1}\x{30a3}\x{30a5}\x{30a7}\x{30a9}])?\x{30fc}?\x{30f3}?/       \x{30de}\x{30fc}\x{30af}\x{30b5}\x{30fc}\x{30d3}\x{30b9}        y       $&      \x{30de}\x{30fc}        # part of [perl #133942
 /[\x{3041}-\x{3093}]+/ \x{6f22}\x{5b57}\x{3001}\x{30ab}\x{30bf}\x{30ab}\x{30ca}\x{3001}\x{3072}\x{3089}\x{304c}\x{306a}\x{306e}\x{5165}\x{3063}\x{305f}String  y       $&      \x{3072}\x{3089}\x{304c}\x{306a}\x{306e}        # [perl #133978]
@@ -2025,6 +2025,7 @@ AB\s+\x{100}      AB \x{100}X     y       -       -
 /(?iaa:A?\K*)/ African_Feh     c       -       \\K* is forbidden - matches null string many times in regex
 ^((\w|<(\s)*(?1)(?3)*>)(?:(?3)*\+(?3)*(?2))*)(?3)*\+   a + b + <c + d> y       $1      a + b           # [GH #18096]
 ^((\w|<(\s)*(?1)(?3)*>)(?:(?3)*\+(?3)*(?2))*)(?3)*\+   a + <b> + c     y       $1      a + <b>         # [GH #18096]
+/0?\xdf\xdf\xdf\xdfs\o{500}|/i \o{600} y       $&                              # [GH #18451]
 # Keep these lines at the end of the file
 # pat  string  y/n/etc expr    expected-expr   skip-reason     comment
 # vim: softtabstop=0 noexpandtab