#define uvchr_to_utf8_flags_msgs(d,uv,flags,msgs) \
uvoffuni_to_utf8_flags_msgs(d,NATIVE_TO_UNI(uv),flags, msgs)
#define utf8_to_uvchr_buf(s, e, lenp) \
- _utf8_to_uvchr_buf((const U8 *) (s), (const U8 *) e, lenp)
+ utf8_to_uvchr_buf_helper((const U8 *) (s), (const U8 *) e, lenp)
#define utf8n_to_uvchr(s, len, lenp, flags) \
utf8n_to_uvchr_error(s, len, lenp, flags, 0)
#define utf8n_to_uvchr_error(s, len, lenp, flags, errors) \
#else /* ! EBCDIC */
START_EXTERN_C
-/* How wide can a single UTF-8 encoded character become in bytes. */
-/* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8
- * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be
- * expressed with 4 bytes. However, Perl thinks of UTF-8 as a way to encode
- * non-negative integers in a binary format, even those above Unicode */
+/*
+
+=for apidoc AmnU|STRLEN|UTF8_MAXBYTES
+
+The maximum width of a single UTF-8 encoded character, in bytes.
+
+NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8
+is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be
+expressed with 4 bytes. However, Perl thinks of UTF-8 as a way to encode
+non-negative integers in a binary format, even those above Unicode.
+
+=cut
+ */
#define UTF8_MAXBYTES 13
#ifdef DOINIT
the paradigm that the number of leading bits gives how many total bytes there
are in the character.
+=cut
*/
/* Is the representation of the Unicode code point 'cp' the same regardless of
&& ( NATIVE_UTF8_TO_I8(*(s)) > 0xF9 \
|| (NATIVE_UTF8_TO_I8(*((s) + 1)) >= 0xA2)) \
&& LIKELY((s) + UTF8SKIP(s) <= (e))) \
- ? _is_utf8_char_helper(s, s + UTF8SKIP(s), 0) : 0)
+ ? is_utf8_char_helper(s, s + UTF8SKIP(s), 0) : 0)
#else
# define UTF8_IS_SUPER(s, e) \
(( LIKELY((e) > (s) + 3) \
&& (*(U8*) (s)) >= 0xF4 \
&& ((*(U8*) (s)) > 0xF4 || (*((U8*) (s) + 1) >= 0x90))\
&& LIKELY((s) + UTF8SKIP(s) <= (e))) \
- ? _is_utf8_char_helper(s, s + UTF8SKIP(s), 0) : 0)
+ ? is_utf8_char_helper(s, s + UTF8SKIP(s), 0) : 0)
#endif
/* These are now machine generated, and the 'given' clause is no longer
non-zero, the value gives how many bytes starting at C<s> comprise the code
point's representation.
+=for apidoc AmnU|UV|UNICODE_REPLACEMENT
+
+Evaluates to 0xFFFD, the code point of the Unicode REPLACEMENT CHARACTER
+
=cut
*/
#define UTF8_IS_NONCHAR(s, e) \
? 1 \
: UNLIKELY(((e) - (s)) < UTF8SKIP(s)) \
? 0 \
- : _is_utf8_char_helper(s, e, flags))
+ : is_utf8_char_helper(s, e, flags))
/* Do not use; should be deprecated. Use isUTF8_CHAR() instead; this is
* retained solely for backwards compatibility */