# error Unknown character set
#endif
-#if { VERSION < 5.31.4 }
- /* Versions prior to this accepted things that are now considered
- * malformations, and didn't return -1 on error with warnings enabled
- * */
+#if { VERSION < 5.35.10 }
+ /* Versions prior to 5.31.4 accepted things that are now considered
+ * malformations, and didn't return -1 on error with warnings enabled.
+ * Versions before 5.35.10 dereferenced empty input without checking */
# undef utf8_to_uvchr_buf
#endif
UV
utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
{
+# if { VERSION >= 5.31.4 } /* But from above, must be < 5.35.10 */
+# if { VERSION != 5.35.9 }
+
+ /* Versions less than 5.35.9 could dereference s on zero length, so
+ * pass it something where no harm comes from that. */
+ if (send <= s) s = send = (U8 *) "?";
+ return Perl_utf8_to_uvchr_buf_helper(aTHX_ s, send, retlen);
+
+# else /* Below is 5.35.9, which also works on non-empty input, but
+ for empty input, can wrongly dereference, and additionally is
+ also just plain broken */
+ if (send > s) return Perl_utf8_to_uvchr_buf_helper(aTHX_ s, send, retlen);
+ if (! ckWARN_d(WARN_UTF8)) {
+ if (retlen) *retlen = 0;
+ return UNICODE_REPLACEMENT;
+ }
+ else {
+ s = send = (U8 *) "?";
+
+ /* Call just for its warning */
+ (void) Perl__utf8n_to_uvchr_msgs_helper(s, 0, NULL, 0, NULL, NULL);
+ if (retlen) *retlen = (STRLEN) -1;
+ return 0;
+ }
+
+# endif
+# else
+
UV ret;
STRLEN curlen;
bool overflows = 0;
}
}
-# if { VERSION < 5.26.0 } && ! defined(EBCDIC)
+# if { VERSION < 5.26.0 } && ! defined(EBCDIC)
/* Perl did not properly detect overflow for much of its history on
* non-EBCDIC platforms, often returning an overlong value which may or may
}
else
-# endif /* < 5.26 */
+# endif /* < 5.26 */
/* Here, we are either in a release that properly detects overflow, or
* we have checked for overflow and the next statement is executing as
s, curlen, retlen, (UTF8_ALLOW_ANYUV
& ~(UTF8_ALLOW_LONG|UTF8_ALLOW_EMPTY)));
-# if { VERSION >= 5.26.0 } && { VERSION < 5.28.0 }
+# if { VERSION >= 5.26.0 } && { VERSION < 5.28.0 }
/* But actually, more modern versions restrict the UV to being no more than
* what an IV can hold, so it could still have gotten it wrong about
overflows = 1;
}
-# endif
+# endif
if (UNLIKELY(overflows)) {
if (! do_warnings) {
* modern version of this function returns */
ret = UNICODE_REPLACEMENT;
-# if { VERSION < 5.16.0 }
+# if { VERSION < 5.16.0 }
/* Versions earlier than this don't necessarily return the proper
* length. It should not extend past the end of string, nor past
*retlen = D_PPP_MIN(*retlen, curlen);
*retlen = D_PPP_MIN(*retlen, UTF8SKIP(s));
do {
-# ifdef UTF8_IS_CONTINUATION
+# ifdef UTF8_IS_CONTINUATION
if (! UTF8_IS_CONTINUATION(s[i]))
-# else /* Versions without the above don't support EBCDIC anyway */
+# else /* Versions without the above don't support EBCDIC anyway */
if (s[i] < 0x80 || s[i] > 0xBF)
-# endif
+# endif
{
*retlen = i;
break;
} while (++i < *retlen);
}
-# endif
+# endif /* end of < 5.16.0 */
}
}
return ret;
+
+# endif /* end of < 5.31.4 */
+
}
# endif