* UTF8_IS_INVARIANT() works even if not in UTF-8 */
if (! UTF8_IS_INVARIANT(c) && utf8_target) {
STRLEN c_len = 0;
- c = utf8n_to_uvchr(p, p_end - p, &c_len, ( UTF8_ALLOW_DEFAULT
- | UTF8_CHECK_ONLY));
- if (c_len == (STRLEN)-1)
- Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
+ const U32 utf8n_flags = UTF8_ALLOW_DEFAULT;
+ c = utf8n_to_uvchr(p, p_end - p, &c_len, utf8n_flags | UTF8_CHECK_ONLY);
+ if (c_len == (STRLEN)-1) {
+ _force_out_malformed_utf8_message(p, p_end,
+ utf8n_flags,
+ 1 /* 1 means die */ );
+ NOT_REACHED; /* NOTREACHED */
+ }
if (c > 255 && OP(n) == ANYOFL && ! ANYOFL_UTF8_LOCALE_REQD(flags)) {
_CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(c);
}
or skip "These tests won't work on EBCIDIC", 3;
fresh_perl_is(
"BEGIN{\$^H=hex ~0}\xF3",
- "Integer overflow in hexadecimal number at - line 1.\n" .
- "Malformed UTF-8 character: \\xf3 (too short; 1 byte available, need 4) at - line 1.",
+ "Integer overflow in hexadecimal number at - line 1.\n"
+ . "Malformed UTF-8 character: \\xf3 (too short; 1 byte available, need 4) at - line 1.\n"
+ . "Malformed UTF-8 character (fatal) at - line 1.",
{},
'[perl #128996] - use of PL_op after op is freed'
);
);
fresh_perl_like(
qq(BEGIN{\$^H=0x800000}\n 0m 0\xB5\xB500\xB5\0),
- qr/Unrecognized character \\x\{0\}; marked by <-- HERE after 0m.*<-- HERE near column 12 at - line 2./,
+ qr/Malformed UTF-8 character: \\xb5 \(unexpected continuation byte 0xb5, with no preceding start byte\)/,
{},
'[perl #129000] read before buffer'
);
} else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
p++;
highhalf++;
- } else if (! UTF8_IS_INVARIANT(c)) {
- /* malformed UTF-8 */
- ENTER;
- SAVESPTR(PL_warnhook);
- PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
- LEAVE;
+ } else if (! UTF8_IS_INVARIANT(c)) {
+ _force_out_malformed_utf8_message((U8 *) p, (U8 *) e,
+ 0,
+ 1 /* 1 means die */ );
+ NOT_REACHED; /* NOTREACHED */
}
}
if (!highhalf)
}
unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
if (retlen == (STRLEN)-1) {
- /* malformed UTF-8 */
- ENTER;
- SAVESPTR(PL_warnhook);
- PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
- LEAVE;
+ _force_out_malformed_utf8_message((U8 *) s,
+ (U8 *) bufend,
+ 0,
+ 1 /* 1 means die */ );
+ NOT_REACHED; /* NOTREACHED */
}
return unichar;
} else {
e - backslash_ptr,
&first_bad_char_loc))
{
- /* If warnings are on, this will print a more detailed analysis of what
- * is wrong than the error message below */
- utf8n_to_uvchr(first_bad_char_loc,
- e - ((char *) first_bad_char_loc),
- NULL, 0);
-
- /* We deliberately don't try to print the malformed character, which
- * might not print very well; it also may be just the first of many
- * malformations, so don't print what comes after it */
+ _force_out_malformed_utf8_message(first_bad_char_loc,
+ (U8 *) PL_parser->bufend,
+ 0,
+ 0 /* 0 means don't die */ );
yyerror_pv(Perl_form(aTHX_
"Malformed UTF-8 character immediately after '%.*s'",
(int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
STRLEN len;
const char* const str = SvPV_const(res, len);
if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
- /* If warnings are on, this will print a more detailed analysis of
- * what is wrong than the error message below */
- utf8n_to_uvchr(first_bad_char_loc,
- (char *) first_bad_char_loc - str,
- NULL, 0);
-
- /* We deliberately don't try to print the malformed character,
- * which might not print very well; it also may be just the first
- * of many malformations, so don't print what comes after it */
+ _force_out_malformed_utf8_message(first_bad_char_loc,
+ (U8 *) PL_parser->bufend,
+ 0,
+ 0 /* 0 means don't die */ );
yyerror_pv(
Perl_form(aTHX_
"Malformed UTF-8 returned by %.*s immediately after '%.*s'",
default:
if (UTF) {
if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
- ENTER;
- SAVESPTR(PL_warnhook);
- PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
- LEAVE;
+ _force_out_malformed_utf8_message((U8 *) s, (U8 *) PL_bufend,
+ 0,
+ 1 /* 1 means die */ );
+ NOT_REACHED; /* NOTREACHED */
}
if (isIDFIRST_utf8((U8*)s)) {
goto keylookup;