X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/1a419e6b1ae911c99a8ca065071290a8ba070856..43275f00a97a14a80f9493c38895a5c77f0fc88a:/utf8.c diff --git a/utf8.c b/utf8.c index a7baed4..cd38768 100644 --- a/utf8.c +++ b/utf8.c @@ -107,6 +107,11 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) return d; } +#ifdef EBCDIC + /* Not representable in UTF-EBCDIC */ + flags |= UNICODE_DISALLOW_FE_FF; +#endif + /* The first problematic code point is the first surrogate */ if (uv >= UNICODE_SURROGATE_FIRST && ckWARN3_d(WARN_SURROGATE, WARN_NON_UNICODE, WARN_NONCHAR)) @@ -130,6 +135,10 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) if (flags & UNICODE_DISALLOW_SUPER || (UNICODE_IS_FE_FF(uv) && (flags & UNICODE_DISALLOW_FE_FF))) { +#ifdef EBCDIC + Perl_die(aTHX_ "Can't represent character for Ox%"UVXf" on this platform", uv); + assert(0); +#endif return NULL; } } @@ -1688,7 +1697,7 @@ Perl__is_utf8_perl_idstart(pTHX_ const U8 *p) if (! PL_utf8_perl_idstart) { invlist = _new_invlist_C_array(_Perl_IDStart_invlist); } - return is_utf8_common(p, &PL_utf8_perl_idstart, "", invlist); + return is_utf8_common(p, &PL_utf8_perl_idstart, "_Perl_IDStart", invlist); } bool @@ -1711,7 +1720,7 @@ Perl__is_utf8_perl_idcont(pTHX_ const U8 *p) if (! PL_utf8_perl_idcont) { invlist = _new_invlist_C_array(_Perl_IDCont_invlist); } - return is_utf8_common(p, &PL_utf8_perl_idcont, "", invlist); + return is_utf8_common(p, &PL_utf8_perl_idcont, "_Perl_IDCont", invlist); } bool @@ -1864,7 +1873,7 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, } STATIC UV -S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result, U8* const ustrp, STRLEN *lenp) +S_check_locale_boundary_crossing(pTHX_ const char * const func_name, const U8* const p, const UV result, U8* const ustrp, STRLEN *lenp) { /* This is called when changing the case of a utf8-encoded character above * the Latin1 range, and the operation is in a non-UTF-8 locale. If the @@ -1907,6 +1916,14 @@ bad_crossing: /* Failed, have to return the original */ original = valid_utf8_to_uvchr(p, lenp); + + /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */ + Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), + "Can't do %s(\"\\x{%"UVXf"}\") on non-UTF-8 locale; " + "resolved to \"\\x{%"UVXf"}\".", + func_name, + original, + original); Copy(p, ustrp, *lenp, char); return original; } @@ -1955,7 +1972,7 @@ Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags result = CALL_UPPER_CASE(p, ustrp, lenp); if (flags) { - result = check_locale_boundary_crossing(p, result, ustrp, lenp); + result = check_locale_boundary_crossing("uc", p, result, ustrp, lenp); } return result; } @@ -2020,7 +2037,7 @@ Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags result = CALL_TITLE_CASE(p, ustrp, lenp); if (flags) { - result = check_locale_boundary_crossing(p, result, ustrp, lenp); + result = check_locale_boundary_crossing("ucfirst", p, result, ustrp, lenp); } return result; } @@ -2084,7 +2101,7 @@ Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags result = CALL_LOWER_CASE(p, ustrp, lenp); if (flags) { - result = check_locale_boundary_crossing(p, result, ustrp, lenp); + result = check_locale_boundary_crossing("lc", p, result, ustrp, lenp); } return result; @@ -2168,15 +2185,23 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags) && memEQ((char *) p, LATIN_CAPITAL_LETTER_SHARP_S_UTF8, sizeof(LATIN_CAPITAL_LETTER_SHARP_S_UTF8) - 1)) { + /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */ + Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), + "Can't do fc(\"\\x{1E9E}\") on non-UTF-8 locale; " + "resolved to \"\\x{17F}\\x{17F}\"."); goto return_long_s; } else if (UTF8SKIP(p) == sizeof(LATIN_SMALL_LIGATURE_LONG_S_T) - 1 && memEQ((char *) p, LATIN_SMALL_LIGATURE_LONG_S_T_UTF8, sizeof(LATIN_SMALL_LIGATURE_LONG_S_T_UTF8) - 1)) { + /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */ + Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), + "Can't do fc(\"\\x{FB05}\") on non-UTF-8 locale; " + "resolved to \"\\x{FB06}\"."); goto return_ligature_st; } - return check_locale_boundary_crossing(p, result, ustrp, lenp); + return check_locale_boundary_crossing("fc", p, result, ustrp, lenp); } else if (! (flags & FOLD_FLAGS_NOMIX_ASCII)) { return result;