* that could result in a non-overflowing code point */
PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW;
- assert(s + UTF8SKIP(s) >= e);
+ assert(s <= e && s + UTF8SKIP(s) >= e);
+
+#if ! defined(UV_IS_QUAD) && ! defined(EBCDIC)
+
+ /* On 32 bit ASCII machines, many overlongs that start with FF don't
+ * overflow */
+
+ if (isFF_OVERLONG(s, e - s)) {
+ const U8 max_32_bit_overlong[] = "\xFF\x80\x80\x80\x80\x80\x80\x84";
+ return memGE(s, max_32_bit_overlong,
+ MIN(e - s, sizeof(max_32_bit_overlong) - 1));
+ }
+
+#endif
for (x = s; x < e; x++, y++) {
return TRUE;
}
-# if defined(UV_IS_QUAD) || defined(EBCDIC)
+ /* Check for the FF overlong */
+ return isFF_OVERLONG(s, len);
+}
+
+PERL_STATIC_INLINE bool
+S_isFF_OVERLONG(const U8 * const s, const STRLEN len)
+{
+ PERL_ARGS_ASSERT_ISFF_OVERLONG;
/* Check for the FF overlong. This happens only if all these bytes match;
* what comes after them doesn't matter. See tables in utf8.h,
- * utfebcdic.h. (Can't happen on ASCII 32-bit platforms, as overflows
- * instead.) */
-
- if ( len >= sizeof(FF_OVERLONG_PREFIX) - 1
- && UNLIKELY(memEQ(s, FF_OVERLONG_PREFIX,
- sizeof(FF_OVERLONG_PREFIX) - 1)))
- {
- return TRUE;
- }
-
-#endif
+ * utfebcdic.h. */
- return FALSE;
+ return len >= sizeof(FF_OVERLONG_PREFIX) - 1
+ && UNLIKELY(memEQ(s, FF_OVERLONG_PREFIX,
+ sizeof(FF_OVERLONG_PREFIX) - 1));
}
#undef F0_ABOVE_OVERLONG
S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
/* How many bytes to print */
- const STRLEN print_len,
+ STRLEN print_len,
/* Which one is the non-continuation */
const STRLEN non_cont_byte_pos,
? "immediately"
: Perl_form(aTHX_ "%d bytes",
(int) non_cont_byte_pos);
+ unsigned int i;
PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT;
* calculated, it's likely faster to pass it; verify under DEBUGGING */
assert(expect_len == UTF8SKIP(s));
+ /* It is possible that utf8n_to_uvchr() was called incorrectly, with a
+ * length that is larger than is actually available in the buffer. If we
+ * print all the bytes based on that length, we will read past the buffer
+ * end. Often, the strings are NUL terminated, so to lower the chances of
+ * this happening, print the malformed bytes only up through any NUL. */
+ for (i = 1; i < print_len; i++) {
+ if (*(s + i) == '\0') {
+ print_len = i + 1; /* +1 gets the NUL printed */
+ break;
+ }
+ }
+
return Perl_form(aTHX_ "%s: %s (unexpected non-continuation byte 0x%02x,"
" %s after start byte 0x%02x; need %d bytes, got %d)",
malformed_text,
/* Add the passed-in inversion list, which invalidates the one
* already stored in the swash */
invlist_in_swash_is_valid = FALSE;
+ SvREADONLY_off(swash_invlist); /* Turned on again below */
_invlist_union(invlist, swash_invlist, &swash_invlist);
}
else {
else SvREFCNT_inc_simple_void_NN(swash_invlist);
}
+ /* The result is immutable. Forbid attempts to change it. */
SvREADONLY_on(swash_invlist);
/* Use the inversion list stand-alone if small enough */