+ uv = UTF8_ACCUMULATE(uv, *s);
+ }
+ else {
+ /* Here, found a non-continuation before processing all expected
+ * bytes. This byte begins a new character, so quit, even if
+ * allowing this malformation. */
+ unexpected_non_continuation = TRUE;
+ break;
+ }
+ } /* End of loop through the character's bytes */
+
+ /* Save how many bytes were actually in the character */
+ curlen = s - s0;
+
+ /* The loop above finds two types of malformations: non-continuation and/or
+ * overflow. The non-continuation malformation is really a too-short
+ * malformation, as it means that the current character ended before it was
+ * expected to (being terminated prematurely by the beginning of the next
+ * character, whereas in the too-short malformation there just are too few
+ * bytes available to hold the character. In both cases, the check below
+ * that we have found the expected number of bytes would fail if executed.)
+ * Thus the non-continuation malformation is really unnecessary, being a
+ * subset of the too-short malformation. But there may be existing
+ * applications that are expecting the non-continuation type, so we retain
+ * it, and return it in preference to the too-short malformation. (If this
+ * code were being written from scratch, the two types might be collapsed
+ * into one.) I, khw, am also giving priority to returning the
+ * non-continuation and too-short malformations over overflow when multiple
+ * ones are present. I don't know of any real reason to prefer one over
+ * the other, except that it seems to me that multiple-byte errors trumps
+ * errors from a single byte */
+ if (UNLIKELY(unexpected_non_continuation)) {
+ if (!(flags & UTF8_ALLOW_NON_CONTINUATION)) {
+ if (! (flags & UTF8_CHECK_ONLY)) {
+ if (curlen == 1) {
+ sv = sv_2mortal(Perl_newSVpvf(aTHX_ "%s (unexpected non-continuation byte 0x%02x, immediately after start byte 0x%02x)", malformed_text, *s, *s0));
+ }
+ else {
+ sv = sv_2mortal(Perl_newSVpvf(aTHX_ "%s (unexpected non-continuation byte 0x%02x, %d bytes after start byte 0x%02x, expected %d bytes)", malformed_text, *s, (int) curlen, *s0, (int)expectlen));
+ }
+ }
+ goto malformed;
+ }
+ uv = UNICODE_REPLACEMENT;