{
PERL_ARGS_ASSERT_UTF8_TO_UVCHR;
- return utf8_to_uvchr_buf(s, s + UTF8_MAXBYTES, retlen);
+ /* This function is unsafe if malformed UTF-8 input is given it, which is
+ * why the function is deprecated. If the first byte of the input
+ * indicates that there are more bytes remaining in the sequence that forms
+ * the character than there are in the input buffer, it can read past the
+ * end. But we can make it safe if the input string happens to be
+ * NUL-terminated, as many strings in Perl are, by refusing to read past a
+ * NUL. A NUL indicates the start of the next character anyway. If the
+ * input isn't NUL-terminated, the function remains unsafe, as it always
+ * has been.
+ *
+ * An initial NUL has to be handled separately, but all ASCIIs can be
+ * handled the same way, speeding up this common case */
+
+ if (UTF8_IS_INVARIANT(*s)) { /* Assumes 's' contains at least 1 byte */
+ return (UV) *s;
+ }
+
+ return utf8_to_uvchr_buf(s,
+ s + strnlen((char *) s, UTF8_MAXBYTES),
+ retlen);
}
/*