#define MASK UTF_CONTINUATION_MASK
U8 *
-Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags)
{
PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS;
uvchr_to_utf8(p, c);
return CALL_FOLD_CASE(c, p, p, lenp, flags & FOLD_FLAGS_FULL);
}
- else { /* Otherwise, _to_utf8_fold_flags has the intelligence to deal with
+ else { /* Otherwise, _toFOLD_utf8_flags has the intelligence to deal with
the special flags. */
U8 utf8_c[UTF8_MAXBYTES + 1];
needs_full_generality:
uvchr_to_utf8(utf8_c, c);
- return _to_utf8_fold_flags(utf8_c, p, lenp, flags);
+ return _toFOLD_utf8_flags(utf8_c, utf8_c + sizeof(utf8_c), p, lenp, flags);
}
}
Perl_croak(aTHX_ "panic: hv_store() unexpectedly failed");
}
+ if (instr(file, "mathoms.c")) {
+ Perl_warner(aTHX_ WARN_DEPRECATED,
+ "In %s, line %d, starting in Perl v5.30, %s()"
+ " will be removed. Avoid this message by"
+ " converting to use %s().\n",
+ file, line, name, alternative);
+ }
+ else {
Perl_warner(aTHX_ WARN_DEPRECATED,
"In %s, line %d, starting in Perl v5.30, %s() will"
" require an additional parameter. Avoid this"
" message by converting to use %s().\n",
file, line, name, alternative);
+ }
}
}
}
warn_on_first_deprecated_use(name, alternative, use_locale, file, line);
if (use_utf8 && UTF8_IS_ABOVE_LATIN1(*p)) {
- SV * invlist;
switch (classnum) {
case _CC_WORDCHAR:
return is_VERTWS_high(p);
case _CC_IDFIRST:
if (! PL_utf8_perl_idstart) {
- invlist = _new_invlist_C_array(_Perl_IDStart_invlist);
+ PL_utf8_perl_idstart
+ = _new_invlist_C_array(_Perl_IDStart_invlist);
}
- return is_utf8_common(p, &PL_utf8_perl_idstart, "_Perl_IDStart", invlist);
+ return is_utf8_common(p, &PL_utf8_perl_idstart,
+ "_Perl_IDStart", NULL);
case _CC_IDCONT:
if (! PL_utf8_perl_idcont) {
- invlist = _new_invlist_C_array(_Perl_IDCont_invlist);
+ PL_utf8_perl_idcont
+ = _new_invlist_C_array(_Perl_IDCont_invlist);
}
- return is_utf8_common(p, &PL_utf8_perl_idcont, "_Perl_IDCont", invlist);
+ return is_utf8_common(p, &PL_utf8_perl_idcont,
+ "_Perl_IDCont", NULL);
}
}
/*
=for apidoc to_utf8_case
-Instead use the appropriate one of L</toUPPER_utf8>,
-L</toTITLE_utf8>,
-L</toLOWER_utf8>,
-or L</toFOLD_utf8>.
+Instead use the appropriate one of L</toUPPER_utf8_safe>,
+L</toTITLE_utf8_safe>,
+L</toLOWER_utf8_safe>,
+or L</toFOLD_utf8_safe>.
+
+This function will be removed in Perl v5.28.
C<p> contains the pointer to the UTF-8 string encoding
the character that is being converted. This routine assumes that the character
Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp,
SV **swashp, const char *normal, const char *special)
{
+ STRLEN len_cp;
+ UV cp;
+ const U8 * e = p + UTF8SKIP(p);
+
PERL_ARGS_ASSERT_TO_UTF8_CASE;
- return _to_utf8_case(valid_utf8_to_uvchr(p, NULL), p, ustrp, lenp, swashp, normal, special);
+ cp = utf8n_to_uvchr(p, e - p, &len_cp, UTF8_CHECK_ONLY);
+ if (len_cp == (STRLEN) -1) {
+ _force_out_malformed_utf8_message(p, e,
+ _UTF8_NO_CONFIDENCE_IN_CURLEN, 1 /* Die */ );
+ }
+
+ return _to_utf8_case(cp, p, ustrp, lenp, swashp, normal, special);
}
/* change namve uv1 to 'from' */
return original;
}
+STATIC U32
+S_check_and_deprecate(pTHX_ const U8 *p,
+ const U8 **e,
+ const unsigned int type, /* See below */
+ const bool use_locale, /* Is this a 'LC_'
+ macro call? */
+ const char * const file,
+ const unsigned line)
+{
+ /* This is a temporary function to deprecate the unsafe calls to the case
+ * changing macros and functions. It keeps all the special stuff in just
+ * one place.
+ *
+ * It updates *e with the pointer to the end of the input string. If using
+ * the old-style macros, *e is NULL on input, and so this function assumes
+ * the input string is long enough to hold the entire UTF-8 sequence, and
+ * sets *e accordingly, but it then returns a flag to pass the
+ * utf8n_to_uvchr(), to tell it that this size is a guess, and to avoid
+ * using the full length if possible.
+ *
+ * It also does the assert that *e > p when *e is not NULL. This should be
+ * migrated to the callers when this function gets deleted.
+ *
+ * The 'type' parameter is used for the caller to specify which case
+ * changing function this is called from: */
+
+# define DEPRECATE_TO_UPPER 0
+# define DEPRECATE_TO_TITLE 1
+# define DEPRECATE_TO_LOWER 2
+# define DEPRECATE_TO_FOLD 3
+
+ U32 utf8n_flags = 0;
+ const char * name;
+ const char * alternative;
+
+ PERL_ARGS_ASSERT_CHECK_AND_DEPRECATE;
+
+ if (*e == NULL) {
+ utf8n_flags = _UTF8_NO_CONFIDENCE_IN_CURLEN;
+ *e = p + UTF8SKIP(p);
+
+ /* For mathoms.c calls, we use the function name we know is stored
+ * there. It could be part of a larger path */
+ if (type == DEPRECATE_TO_UPPER) {
+ name = instr(file, "mathoms.c")
+ ? "to_utf8_upper"
+ : "toUPPER_utf8";
+ alternative = "toUPPER_utf8_safe";
+ }
+ else if (type == DEPRECATE_TO_TITLE) {
+ name = instr(file, "mathoms.c")
+ ? "to_utf8_title"
+ : "toTITLE_utf8";
+ alternative = "toTITLE_utf8_safe";
+ }
+ else if (type == DEPRECATE_TO_LOWER) {
+ name = instr(file, "mathoms.c")
+ ? "to_utf8_lower"
+ : "toLOWER_utf8";
+ alternative = "toLOWER_utf8_safe";
+ }
+ else if (type == DEPRECATE_TO_FOLD) {
+ name = instr(file, "mathoms.c")
+ ? "to_utf8_fold"
+ : "toFOLD_utf8";
+ alternative = "toFOLD_utf8_safe";
+ }
+ else Perl_croak(aTHX_ "panic: Unexpected case change type");
+
+ warn_on_first_deprecated_use(name, alternative, use_locale, file, line);
+ }
+ else {
+ assert (p < *e);
+ }
+
+ return utf8n_flags;
+}
+
/* The process for changing the case is essentially the same for the four case
* change types, except there are complications for folding. Otherwise the
* difference is only which case to change to. To make sure that they all do
* going on. */
#define CASE_CHANGE_BODY_START(locale_flags, LC_L1_change_macro, L1_func, \
L1_func_extra_param) \
+ \
if (flags & (locale_flags)) { \
/* Treat a UTF-8 locale as not being in locale at all */ \
if (IN_UTF8_CTYPE_LOCALE) { \
return L1_func(*p, ustrp, lenp, L1_func_extra_param); \
} \
} \
- else if UTF8_IS_DOWNGRADEABLE_START(*p) { \
+ else if UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e) { \
if (flags & (locale_flags)) { \
result = LC_L1_change_macro(EIGHT_BIT_UTF8_TO_NATIVE(*p, \
*(p+1))); \
ustrp, lenp, L1_func_extra_param); \
} \
} \
- else { /* malformed UTF-8 */ \
- result = valid_utf8_to_uvchr(p, NULL); \
+ else { /* malformed UTF-8 or ord above 255 */ \
+ STRLEN len_result; \
+ result = utf8n_to_uvchr(p, e - p, &len_result, UTF8_CHECK_ONLY); \
+ if (len_result == (STRLEN) -1) { \
+ _force_out_malformed_utf8_message(p, e, utf8n_flags, \
+ 1 /* Die */ ); \
+ }
#define CASE_CHANGE_BODY_END(locale_flags, change_macro) \
result = change_macro(result, p, ustrp, lenp); \
/*
=for apidoc to_utf8_upper
-Instead use L</toUPPER_utf8>.
+Instead use L</toUPPER_utf8_safe>.
=cut */
* be used. */
UV
-Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags)
+Perl__to_utf8_upper_flags(pTHX_ const U8 *p,
+ const U8 *e,
+ U8* ustrp,
+ STRLEN *lenp,
+ bool flags,
+ const char * const file,
+ const int line)
{
UV result;
+ const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_UPPER,
+ cBOOL(flags), file, line);
PERL_ARGS_ASSERT__TO_UTF8_UPPER_FLAGS;
/*
=for apidoc to_utf8_title
-Instead use L</toTITLE_utf8>.
+Instead use L</toTITLE_utf8_safe>.
=cut */
*/
UV
-Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags)
+Perl__to_utf8_title_flags(pTHX_ const U8 *p,
+ const U8 *e,
+ U8* ustrp,
+ STRLEN *lenp,
+ bool flags,
+ const char * const file,
+ const int line)
{
UV result;
+ const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_TITLE,
+ cBOOL(flags), file, line);
PERL_ARGS_ASSERT__TO_UTF8_TITLE_FLAGS;
/*
=for apidoc to_utf8_lower
-Instead use L</toLOWER_utf8>.
+Instead use L</toLOWER_utf8_safe>.
=cut */
*/
UV
-Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, bool flags)
+Perl__to_utf8_lower_flags(pTHX_ const U8 *p,
+ const U8 *e,
+ U8* ustrp,
+ STRLEN *lenp,
+ bool flags,
+ const char * const file,
+ const int line)
{
UV result;
+ const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_LOWER,
+ cBOOL(flags), file, line);
PERL_ARGS_ASSERT__TO_UTF8_LOWER_FLAGS;
/*
=for apidoc to_utf8_fold
-Instead use L</toFOLD_utf8>.
+Instead use L</toFOLD_utf8_safe>.
=cut */
*/
UV
-Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
+Perl__to_utf8_fold_flags(pTHX_ const U8 *p,
+ const U8 *e,
+ U8* ustrp,
+ STRLEN *lenp,
+ U8 flags,
+ const char * const file,
+ const int line)
{
UV result;
+ const U32 utf8n_flags = check_and_deprecate(p, &e, DEPRECATE_TO_FOLD,
+ cBOOL(flags), file, line);
PERL_ARGS_ASSERT__TO_UTF8_FOLD_FLAGS;
invlist = _new_invlist(0);
}
else {
- while (isSPACE(*l)) l++;
l = (U8 *) after_atou;
/* Get the 0th element, which is needed to setup the inversion list */
*foldbuf1 = toFOLD(*p1);
}
else if (u1) {
- _to_utf8_fold_flags(p1, foldbuf1, &n1, flags_for_folder);
+ _toFOLD_utf8_flags(p1, e1, foldbuf1, &n1, flags_for_folder);
}
else { /* Not UTF-8, get UTF-8 fold */
_to_uni_fold_flags(*p1, foldbuf1, &n1, flags_for_folder);
*foldbuf2 = toFOLD(*p2);
}
else if (u2) {
- _to_utf8_fold_flags(p2, foldbuf2, &n2, flags_for_folder);
+ _toFOLD_utf8_flags(p2, e2, foldbuf2, &n2, flags_for_folder);
}
else {
_to_uni_fold_flags(*p2, foldbuf2, &n2, flags_for_folder);