See also L<Encode/"Handling Malformed Data">.
+=item Malformed UTF-8 character immediately after '%s'
+
+(F) You said C<use utf8>, but the program file doesn't comply with UTF-8
+encoding rules. The message prints out the properly encoded characters
+just before the first bad one. If C<utf8> warnings are enabled, a
+warning is generated that gives more details about the type of
+malformation.
+
=item Malformed UTF-8 returned by \N
(F) The charnames handler returned malformed UTF-8.
require './test.pl';
}
-plan (tests => 47);
+plan (tests => 48);
use utf8;
use open qw( :utf8 :std );
like( $@, qr/Bad name after Foo::/, 'Bad name after Foo::' );
eval q{ Foo''bar };
like( $@, qr/Bad name after Foo'/, 'Bad name after Foo\'' );
+
+{
+ no warnings 'utf8';
+ my $malformed_to_be = "\x{c0}\x{a0}"; # Overlong sequence
+ CORE::evalbytes "use charnames ':full'; use utf8; my \$x = \"\\N{abc$malformed_to_be}\"";
+ like( $@, qr/Malformed UTF-8 character immediately after '\\N{abc' at .* within string/, 'Malformed UTF-8 input to \N{}');
+}
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
- res = new_constant( NULL, 0, "charnames",
- /* includes all of: \N{...} */
- res, NULL, s - 3, e - s + 4 );
+ if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
+ e - backslash_ptr,
+ &first_bad_char_loc))
+ {
+ /* If warnings are on, this will print a more detailed analysis of what
+ * is wrong than the error message below */
+ utf8n_to_uvuni(first_bad_char_loc,
+ e - ((char *) first_bad_char_loc),
+ NULL, 0);
+
+ /* We deliberately don't try to print the malformed character, which
+ * might not print very well; it also may be just the first of many
+ * malformations, so don't print what comes after it */
+ yyerror(Perl_form(aTHX_
+ "Malformed UTF-8 character immediately after '%.*s'",
+ (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
+ return NULL;
+ }
+
+ res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
+ /* include the <}> */
+ e - backslash_ptr + 1);
if (! SvPOK(res)) {
return NULL;
}
/* Either returns sv, or mortalizes sv and returns a new SV*.
Best used as sv=new_constant(..., sv, ...).
If s, pv are NULL, calls subroutine with one argument,
- and type is used with error messages only. */
+ and <type> is used with error messages only.
+ <type> is assumed to be well formed UTF-8 */
STATIC SV *
S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,