}
/*
-=for apidoc is_utf8_char_buf
-
-This is identical to the macro L</isUTF8_CHAR>.
-
-=cut */
-
-STRLEN
-Perl_is_utf8_char_buf(const U8 *buf, const U8* buf_end)
-{
-
- PERL_ARGS_ASSERT_IS_UTF8_CHAR_BUF;
-
- return isUTF8_CHAR(buf, buf_end);
-}
-
-/*
=for apidoc is_utf8_string
Returns true if the first C<len> bytes of string C<s> form a valid
* is the label <malformed>.
*/
-malformed:
+ malformed:
if (sv && ckWARN_d(WARN_UTF8)) {
pack_warn = packWARN(WARN_UTF8);
}
-disallowed:
+ disallowed:
if (flags & UTF8_CHECK_ONLY) {
if (retlen)
return 0;
}
-do_warn:
+ do_warn:
if (pack_warn) { /* <pack_warn> was initialized to 0, and changed only
if warnings are to be raised. */
s += UTF8SKIP(s);
}
- /* Here, no characters crossed, result is ok as-is */
+ /* Here, no characters crossed, result is ok as-is, but we warn. */
+ _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(p, p + UTF8SKIP(p));
return result;
}
-bad_crossing:
+ bad_crossing:
/* Failed, have to return the original */
original = valid_utf8_to_uvchr(p, lenp);
* routine. This allows that step to be skipped.
* Currently, this requires s1 to be encoded as UTF-8
* (u1 must be true), which is asserted for.
+ * FOLDEQ_S1_FOLDS_SANE With either NOMIX_ASCII or LOCALE, no folds may
+ * cross certain boundaries. Hence, the caller should
+ * let this function do the folding instead of
+ * pre-folding. This code contains an assertion to
+ * that effect. However, if the caller knows what
+ * it's doing, it can pass this flag to indicate that,
+ * and the assertion is skipped.
* FOLDEQ_S2_ALREADY_FOLDED Similarly.
+ * FOLDEQ_S2_FOLDS_SANE
*/
I32
Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const char *s2, char **pe2, UV l2, bool u2, U32 flags)
PERL_ARGS_ASSERT_FOLDEQ_UTF8_FLAGS;
assert( ! ((flags & (FOLDEQ_UTF8_NOMIX_ASCII | FOLDEQ_LOCALE))
- && (flags & (FOLDEQ_S1_ALREADY_FOLDED | FOLDEQ_S2_ALREADY_FOLDED))));
+ && (((flags & FOLDEQ_S1_ALREADY_FOLDED)
+ && !(flags & FOLDEQ_S1_FOLDS_SANE))
+ || ((flags & FOLDEQ_S2_ALREADY_FOLDED)
+ && !(flags & FOLDEQ_S2_FOLDS_SANE)))));
/* The algorithm is to trial the folds without regard to the flags on
* the first line of the above assert(), and then see if the result
* violates them. This means that the inputs can't be pre-folded to a