+ return utf8_to_uvchr_buf(s, s + UTF8_MAXBYTES, retlen);
+}
+
+/*
+=for apidoc utf8_to_uvuni_buf
+
+Returns the Unicode code point of the first character in the string C<s> which
+is assumed to be in UTF-8 encoding; C<send> points to 1 beyond the end of C<s>.
+C<retlen> will be set to the length, in bytes, of that character.
+
+This function should only be used when the returned UV is considered
+an index into the Unicode semantic tables (e.g. swashes).
+
+If C<s> does not point to a well-formed UTF-8 character and UTF8 warnings are
+enabled, zero is returned and C<*retlen> is set (if C<retlen> isn't
+NULL) to -1. If those warnings are off, the computed value if well-defined (or
+the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
+is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
+next possible position in C<s> that could begin a non-malformed character.
+See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is returned.
+
+=cut
+*/
+
+UV
+Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
+{
+ PERL_ARGS_ASSERT_UTF8_TO_UVUNI_BUF;
+
+ assert(send > s);
+
+ /* Call the low level routine asking for checks */
+ return Perl_utf8n_to_uvuni(aTHX_ s, send -s, retlen,
+ ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+}
+
+/* Like L</utf8_to_uvuni_buf>(), but should only be called when it is known that
+ * there are no malformations in the input UTF-8 string C<s>. Surrogates,
+ * non-character code points, and non-Unicode code points are allowed */
+
+UV
+Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
+{
+ UV expectlen = UTF8SKIP(s);
+ const U8* send = s + expectlen;
+ UV uv = NATIVE_TO_UTF(*s);
+
+ PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
+
+ if (retlen) {
+ *retlen = expectlen;
+ }
+
+ /* An invariant is trivially returned */
+ if (expectlen == 1) {
+ return uv;
+ }
+
+ /* Remove the leading bits that indicate the number of bytes, leaving just
+ * the bits that are part of the value */
+ uv &= UTF_START_MASK(expectlen);
+
+ /* Now, loop through the remaining bytes, accumulating each into the
+ * working total as we go. (I khw tried unrolling the loop for up to 4
+ * bytes, but there was no performance improvement) */
+ for (++s; s < send; s++) {
+ uv = UTF8_ACCUMULATE(uv, *s);
+ }
+
+ return uv;