This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
toke.c: Rmv no longer necessary UTF-8 checks
authorKarl Williamson <khw@cpan.org>
Mon, 13 Feb 2017 20:03:32 +0000 (13:03 -0700)
committerKarl Williamson <khw@cpan.org>
Tue, 14 Feb 2017 04:24:09 +0000 (21:24 -0700)
The previous commit tightened up the checking for well-formed UTF8ness,
so that the ones removed here were redundant.

The test during a string eval may also no longer be necessary, but since
there are many ways to create that string, I'm not confidant enough to
remove it.

pod/perldiag.pod
toke.c

index c2408f0..b6de3c7 100644 (file)
@@ -3414,14 +3414,6 @@ message.
 
 See also L<Encode/"Handling Malformed Data">.
 
-=item Malformed UTF-8 character immediately after '%s'
-
-(F) You said C<use utf8>, but the program file doesn't comply with UTF-8
-encoding rules.  The message prints out the properly encoded characters
-just before the first bad one.  If C<utf8> warnings are enabled, a
-warning is generated that gives more details about the type of
-malformation.
-
 =item Malformed UTF-8 returned by \N{%s} immediately after '%s'
 
 (F) The charnames handler returned malformed UTF-8.
diff --git a/toke.c b/toke.c
index 3fe5bda..a252c75 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1057,12 +1057,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
                } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
                    p++;
                    highhalf++;
-                } else if (! UTF8_IS_INVARIANT(c)) {
-                    _force_out_malformed_utf8_message((U8 *) p, (U8 *) e,
-                                                      0,
-                                                      1 /* 1 means die */ );
-                    NOT_REACHED; /* NOTREACHED */
-               }
+                } else assert(UTF8_IS_INVARIANT(c));
            }
            if (!highhalf)
                goto plain_copy;
@@ -2595,7 +2590,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
     SV *cv;
     SV *rv;
     HV *stash;
-    const U8* first_bad_char_loc;
     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
 
     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
@@ -2605,21 +2599,6 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
         return res;
     }
 
-    if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
-                                     e - backslash_ptr,
-                                     &first_bad_char_loc))
-    {
-        _force_out_malformed_utf8_message(first_bad_char_loc,
-                                          (U8 *) PL_parser->bufend,
-                                          0,
-                                          0 /* 0 means don't die */ );
-        yyerror_pv(Perl_form(aTHX_
-            "Malformed UTF-8 character immediately after '%.*s'",
-            (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
-                   SVf_UTF8);
-       return NULL;
-    }
-
     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
                         /* include the <}> */
                         e - backslash_ptr + 1);
@@ -5151,12 +5130,6 @@ Perl_yylex(pTHX)
     switch (*s) {
     default:
        if (UTF) {
-            if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
-                _force_out_malformed_utf8_message((U8 *) s, (U8 *) PL_bufend,
-                                                  0,
-                                                  1 /* 1 means die */ );
-                NOT_REACHED; /* NOTREACHED */
-            }
             if (isIDFIRST_utf8_safe(s, PL_bufend)) {
                 goto keylookup;
             }