+STATIC U32
+S_check_and_deprecate(pTHX_ const U8 *p,
+ const U8 **e,
+ const unsigned int type, /* See below */
+ const bool use_locale, /* Is this a 'LC_'
+ macro call? */
+ const char * const file,
+ const unsigned line)
+{
+ /* This is a temporary function to deprecate the unsafe calls to the case
+ * changing macros and functions. It keeps all the special stuff in just
+ * one place.
+ *
+ * It updates *e with the pointer to the end of the input string. If using
+ * the old-style macros, *e is NULL on input, and so this function assumes
+ * the input string is long enough to hold the entire UTF-8 sequence, and
+ * sets *e accordingly, but it then returns a flag to pass the
+ * utf8n_to_uvchr(), to tell it that this size is a guess, and to avoid
+ * using the full length if possible.
+ *
+ * It also does the assert that *e > p when *e is not NULL. This should be
+ * migrated to the callers when this function gets deleted.
+ *
+ * The 'type' parameter is used for the caller to specify which case
+ * changing function this is called from: */
+
+# define DEPRECATE_TO_UPPER 0
+# define DEPRECATE_TO_TITLE 1
+# define DEPRECATE_TO_LOWER 2
+# define DEPRECATE_TO_FOLD 3
+
+ U32 utf8n_flags = 0;
+ const char * name;
+ const char * alternative;
+
+ PERL_ARGS_ASSERT_CHECK_AND_DEPRECATE;
+
+ if (*e == NULL) {
+ utf8n_flags = _UTF8_NO_CONFIDENCE_IN_CURLEN;
+ *e = p + UTF8SKIP(p);
+
+ /* For mathoms.c calls, we use the function name we know is stored
+ * there. It could be part of a larger path */
+ if (type == DEPRECATE_TO_UPPER) {
+ name = instr(file, "mathoms.c")
+ ? "to_utf8_upper"
+ : "toUPPER_utf8";
+ alternative = "toUPPER_utf8_safe";
+ }
+ else if (type == DEPRECATE_TO_TITLE) {
+ name = instr(file, "mathoms.c")
+ ? "to_utf8_title"
+ : "toTITLE_utf8";
+ alternative = "toTITLE_utf8_safe";
+ }
+ else if (type == DEPRECATE_TO_LOWER) {
+ name = instr(file, "mathoms.c")
+ ? "to_utf8_lower"
+ : "toLOWER_utf8";
+ alternative = "toLOWER_utf8_safe";
+ }
+ else if (type == DEPRECATE_TO_FOLD) {
+ name = instr(file, "mathoms.c")
+ ? "to_utf8_fold"
+ : "toFOLD_utf8";
+ alternative = "toFOLD_utf8_safe";
+ }
+ else Perl_croak(aTHX_ "panic: Unexpected case change type");
+
+ warn_on_first_deprecated_use(name, alternative, use_locale, file, line);
+ }
+ else {
+ assert (p < *e);
+ }
+
+ return utf8n_flags;
+}
+
+/* The process for changing the case is essentially the same for the four case
+ * change types, except there are complications for folding. Otherwise the
+ * difference is only which case to change to. To make sure that they all do
+ * the same thing, the bodies of the functions are extracted out into the
+ * following two macros. The functions are written with the same variable
+ * names, and these are known and used inside these macros. It would be
+ * better, of course, to have inline functions to do it, but since different
+ * macros are called, depending on which case is being changed to, this is not
+ * feasible in C (to khw's knowledge). Two macros are created so that the fold
+ * function can start with the common start macro, then finish with its special
+ * handling; while the other three cases can just use the common end macro.
+ *
+ * The algorithm is to use the proper (passed in) macro or function to change
+ * the case for code points that are below 256. The macro is used if using
+ * locale rules for the case change; the function if not. If the code point is
+ * above 255, it is computed from the input UTF-8, and another macro is called
+ * to do the conversion. If necessary, the output is converted to UTF-8. If
+ * using a locale, we have to check that the change did not cross the 255/256
+ * boundary, see check_locale_boundary_crossing() for further details.
+ *
+ * The macros are split with the correct case change for the below-256 case
+ * stored into 'result', and in the middle of an else clause for the above-255
+ * case. At that point in the 'else', 'result' is not the final result, but is
+ * the input code point calculated from the UTF-8. The fold code needs to
+ * realize all this and take it from there.
+ *
+ * If you read the two macros as sequential, it's easier to understand what's
+ * going on. */
+#define CASE_CHANGE_BODY_START(locale_flags, LC_L1_change_macro, L1_func, \
+ L1_func_extra_param) \
+ \
+ if (flags & (locale_flags)) { \
+ /* Treat a UTF-8 locale as not being in locale at all */ \
+ if (IN_UTF8_CTYPE_LOCALE) { \
+ flags &= ~(locale_flags); \
+ } \
+ else { \
+ _CHECK_AND_WARN_PROBLEMATIC_LOCALE; \
+ } \
+ } \
+ \
+ if (UTF8_IS_INVARIANT(*p)) { \
+ if (flags & (locale_flags)) { \
+ result = LC_L1_change_macro(*p); \
+ } \
+ else { \
+ return L1_func(*p, ustrp, lenp, L1_func_extra_param); \
+ } \
+ } \
+ else if UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e) { \
+ if (flags & (locale_flags)) { \
+ result = LC_L1_change_macro(EIGHT_BIT_UTF8_TO_NATIVE(*p, \
+ *(p+1))); \
+ } \
+ else { \
+ return L1_func(EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)), \
+ ustrp, lenp, L1_func_extra_param); \
+ } \
+ } \
+ else { /* malformed UTF-8 or ord above 255 */ \
+ STRLEN len_result; \
+ result = utf8n_to_uvchr(p, e - p, &len_result, UTF8_CHECK_ONLY); \
+ if (len_result == (STRLEN) -1) { \
+ _force_out_malformed_utf8_message(p, e, utf8n_flags, \
+ 1 /* Die */ ); \
+ }
+
+#define CASE_CHANGE_BODY_END(locale_flags, change_macro) \
+ result = change_macro(result, p, ustrp, lenp); \
+ \
+ if (flags & (locale_flags)) { \
+ result = check_locale_boundary_crossing(p, result, ustrp, lenp); \
+ } \
+ return result; \
+ } \
+ \
+ /* Here, used locale rules. Convert back to UTF-8 */ \
+ if (UTF8_IS_INVARIANT(result)) { \
+ *ustrp = (U8) result; \
+ *lenp = 1; \
+ } \
+ else { \
+ *ustrp = UTF8_EIGHT_BIT_HI((U8) result); \
+ *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result); \
+ *lenp = 2; \
+ } \
+ \
+ return result;
+