This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
svleak.t: Add test for #123198
[perl5.git] / utf8.c
diff --git a/utf8.c b/utf8.c
index a7baed4..cd38768 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -107,6 +107,11 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
        return d;
     }
 
+#ifdef EBCDIC
+    /* Not representable in UTF-EBCDIC */
+    flags |= UNICODE_DISALLOW_FE_FF;
+#endif
+
     /* The first problematic code point is the first surrogate */
     if (uv >= UNICODE_SURROGATE_FIRST
         && ckWARN3_d(WARN_SURROGATE, WARN_NON_UNICODE, WARN_NONCHAR))
@@ -130,6 +135,10 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
            if (flags & UNICODE_DISALLOW_SUPER
                || (UNICODE_IS_FE_FF(uv) && (flags & UNICODE_DISALLOW_FE_FF)))
            {
+#ifdef EBCDIC
+                Perl_die(aTHX_ "Can't represent character for Ox%"UVXf" on this platform", uv);
+                assert(0);
+#endif
                return NULL;
            }
        }
@@ -1688,7 +1697,7 @@ Perl__is_utf8_perl_idstart(pTHX_ const U8 *p)
     if (! PL_utf8_perl_idstart) {
         invlist = _new_invlist_C_array(_Perl_IDStart_invlist);
     }
-    return is_utf8_common(p, &PL_utf8_perl_idstart, "", invlist);
+    return is_utf8_common(p, &PL_utf8_perl_idstart, "_Perl_IDStart", invlist);
 }
 
 bool
@@ -1711,7 +1720,7 @@ Perl__is_utf8_perl_idcont(pTHX_ const U8 *p)
     if (! PL_utf8_perl_idcont) {
         invlist = _new_invlist_C_array(_Perl_IDCont_invlist);
     }
-    return is_utf8_common(p, &PL_utf8_perl_idcont, "", invlist);
+    return is_utf8_common(p, &PL_utf8_perl_idcont, "_Perl_IDCont", invlist);
 }
 
 bool
@@ -1864,7 +1873,7 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp,
 }
 
 STATIC UV
-S_check_locale_boundary_crossing(pTHX_ const U8* const p, const UV result, U8* const ustrp, STRLEN *lenp)
+S_check_locale_boundary_crossing(pTHX_ const char * const func_name, const U8* const p, const UV result, U8* const ustrp, STRLEN *lenp)
 {
     /* This is called when changing the case of a utf8-encoded character above
      * the Latin1 range, and the operation is in a non-UTF-8 locale.  If the
@@ -1907,6 +1916,14 @@ bad_crossing:
 
     /* Failed, have to return the original */
     original = valid_utf8_to_uvchr(p, lenp);
+
+    /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */
+    Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
+                           "Can't do %s(\"\\x{%"UVXf"}\") on non-UTF-8 locale; "
+                           "resolved to \"\\x{%"UVXf"}\".",
+                           func_name,
+                           original,
+                           original);
     Copy(p, ustrp, *lenp, char);
     return original;
 }
@@ -1955,7 +1972,7 @@ Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags
        result = CALL_UPPER_CASE(p, ustrp, lenp);
 
        if (flags) {
-           result = check_locale_boundary_crossing(p, result, ustrp, lenp);
+           result = check_locale_boundary_crossing("uc", p, result, ustrp, lenp);
        }
        return result;
     }
@@ -2020,7 +2037,7 @@ Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags
        result = CALL_TITLE_CASE(p, ustrp, lenp);
 
        if (flags) {
-           result = check_locale_boundary_crossing(p, result, ustrp, lenp);
+           result = check_locale_boundary_crossing("ucfirst", p, result, ustrp, lenp);
        }
        return result;
     }
@@ -2084,7 +2101,7 @@ Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags
        result = CALL_LOWER_CASE(p, ustrp, lenp);
 
        if (flags) {
-           result = check_locale_boundary_crossing(p, result, ustrp, lenp);
+           result = check_locale_boundary_crossing("lc", p, result, ustrp, lenp);
        }
 
        return result;
@@ -2168,15 +2185,23 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
                 && memEQ((char *) p, LATIN_CAPITAL_LETTER_SHARP_S_UTF8,
                           sizeof(LATIN_CAPITAL_LETTER_SHARP_S_UTF8) - 1))
             {
+                /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */
+                Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
+                              "Can't do fc(\"\\x{1E9E}\") on non-UTF-8 locale; "
+                              "resolved to \"\\x{17F}\\x{17F}\".");
                 goto return_long_s;
             }
             else if (UTF8SKIP(p) == sizeof(LATIN_SMALL_LIGATURE_LONG_S_T) - 1
                 && memEQ((char *) p, LATIN_SMALL_LIGATURE_LONG_S_T_UTF8,
                           sizeof(LATIN_SMALL_LIGATURE_LONG_S_T_UTF8) - 1))
             {
+                /* diag_listed_as: Can't do %s("%s") on non-UTF-8 locale; resolved to "%s". */
+                Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
+                              "Can't do fc(\"\\x{FB05}\") on non-UTF-8 locale; "
+                              "resolved to \"\\x{FB06}\".");
                 goto return_ligature_st;
             }
-           return check_locale_boundary_crossing(p, result, ustrp, lenp);
+           return check_locale_boundary_crossing("fc", p, result, ustrp, lenp);
        }
        else if (! (flags & FOLD_FLAGS_NOMIX_ASCII)) {
            return result;