This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #123755] including unknown char in error requires care
authorHugo van der Sanden <hv@crypt.org>
Sat, 7 Feb 2015 16:31:04 +0000 (16:31 +0000)
committerHugo van der Sanden <hv@crypt.org>
Mon, 9 Feb 2015 18:49:56 +0000 (18:49 +0000)
AFL (<http://lcamtuf.coredump.cx/afl>) found that when producing the
error message for /(??/ we hit an assert because we've stepped past
the end of the pattern string. Code inspection found that we also do
that in other branches, and we also need to check UTF more carefully.

regcomp.c
t/re/pat.t
t/re/re_tests

index 0d6d344..e069a15 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -521,6 +521,10 @@ static const scan_data_t zero_scan_data =
                 UTF8fARG(UTF, offset, RExC_precomp), \
                 UTF8fARG(UTF, RExC_end - RExC_precomp - offset, RExC_precomp + offset)
 
+/* Used to point after bad bytes for an error message, but avoid skipping
+ * past a nul byte. */
+#define SKIP_IF_CHAR(s) (!*(s) ? 0 : UTF ? UTF8SKIP(s) : 1)
+
 /*
  * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
  * arg. Show regex, up to a maximum length. If it's too long, chop and add
@@ -9705,7 +9709,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                 /*NOTREACHED*/
             default:
               fail_modifiers:
-                RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+                RExC_parse += SKIP_IF_CHAR(RExC_parse);
                /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                 vFAIL2utf8f("Sequence (%"UTF8f"...) not recognized",
                       UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
@@ -9953,7 +9957,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     nextchar(pRExC_state);
                     return ret;
                 }
-                RExC_parse++;
+                --RExC_parse;
+                RExC_parse += SKIP_IF_CHAR(RExC_parse);
                 /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                vFAIL3("Sequence (%.*s...) not recognized",
                                 RExC_parse-seqstart, seqstart);
@@ -10176,7 +10181,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
            case '?':           /* (??...) */
                is_logical = 1;
                if (*RExC_parse != '{') {
-                   RExC_parse++;
+                    RExC_parse += SKIP_IF_CHAR(RExC_parse);
                     /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                     vFAIL2utf8f(
                         "Sequence (%"UTF8f"...) not recognized",
index 3d52554..53972fe 100644 (file)
@@ -22,7 +22,7 @@ BEGIN {
     skip_all_without_unicode_tables();
 }
 
-plan tests => 759;  # Update this when adding/deleting tests.
+plan tests => 765;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -1635,6 +1635,17 @@ EOP
                ok(1, "did not crash");
                ok($match, "[bbb...] resolved as character class, not subscript");
        }
+
+       {       # [perl #123755]
+               for my $pat ('(??', '(?P', '(?i-') {
+                       eval qq{ qr/$pat/ };
+                       ok(1, "qr/$pat/ did not crash");
+                       eval qq{ qr/${pat}\x{123}/ };
+                       my $e = $@;
+                       like($e, qr{\x{123}},
+                               "qr/${pat}x/ shows x in error even if it's a wide character");
+               }
+       }
 } # End of sub run_tests
 
 1;
index ce8d0cf..41cda56 100644 (file)
@@ -1102,7 +1102,7 @@ X(?<=foo.)[YZ]    ..XfooXY..      y       pos     8
 (?P<n>foo)(??{ $+{n} })        snofooefoofoowaa        yM      $+{n}   foo     miniperl cannot load Tie::Hash::NamedCapture
 (?P<=n>foo|bar|baz)    -       c       -       Group name must start with a non-digit word character
 (?P<!n>foo|bar|baz)    -       c       -       Group name must start with a non-digit word character
-(?PX<n>foo|bar|baz)    -       c       -       Sequence (?PX<...) not recognized
+(?PX<n>foo|bar|baz)    -       c       -       Sequence (?PX...) not recognized
 /(?'n'foo|bar|baz)/    snofooewa       y       $1      foo
 /(?'n'foo|bar|baz)/    snofooewa       yM      $+{n}   foo     miniperl cannot load Tie::Hash::NamedCapture
 /(?'n'foo|bar|baz)(?'m'[ew]+)/ snofooewa       yM      $+{n}   foo     miniperl cannot load Tie::Hash::NamedCapture