#define _CORE_SWASH_INIT_RETURN_IF_UNDEF 0x2
#define _CORE_SWASH_INIT_ACCEPT_INVLIST 0x4
+/*
+=head1 Unicode Support
+
+=for apidoc is_ascii_string
+
+This is a misleadingly-named synonym for L</is_invariant_string>.
+On ASCII-ish platforms, the name isn't misleading: the ASCII-range characters
+are exactly the UTF-8 invariants. But EBCDIC machines have more invariants
+than just the ASCII characters, so C<is_invariant_string> is preferred.
+
+=cut
+*/
+#define is_ascii_string(s, len) is_invariant_string(s, len)
+
#define uvchr_to_utf8(a,b) uvchr_to_utf8_flags(a,b,0)
#define uvchr_to_utf8_flags(d,uv,flags) \
uvoffuni_to_utf8_flags(d,NATIVE_TO_UNI(uv),flags)
#define FOLDEQ_LOCALE (1 << 1)
#define FOLDEQ_S1_ALREADY_FOLDED (1 << 2)
#define FOLDEQ_S2_ALREADY_FOLDED (1 << 3)
+#define FOLDEQ_S1_FOLDS_SANE (1 << 4)
+#define FOLDEQ_S2_FOLDS_SANE (1 << 5)
#define ibcmp_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \
cBOOL(! foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2))
* code point whose UTF-8 is known to occupy 2 bytes; they are less efficient
* than the EIGHT_BIT versions on EBCDIC platforms. We use the logical '~'
* operator instead of "<=" to avoid getting compiler warnings.
- * MAX_PORTABLE_UTF8_TWO_BYTE should be exactly all one bits in the lower few
+ * MAX_UTF8_TWO_BYTE should be exactly all one bits in the lower few
* places, so the ~ works */
#define UTF8_TWO_BYTE_HI(c) \
(__ASSERT_((sizeof(c) == 1) \
- || !(((WIDEST_UTYPE)(c)) & ~MAX_PORTABLE_UTF8_TWO_BYTE)) \
+ || !(((WIDEST_UTYPE)(c)) & ~MAX_UTF8_TWO_BYTE)) \
((U8) __BASE_TWO_BYTE_HI(c, NATIVE_TO_UNI)))
#define UTF8_TWO_BYTE_LO(c) \
(__ASSERT_((sizeof(c) == 1) \
- || !(((WIDEST_UTYPE)(c)) & ~MAX_PORTABLE_UTF8_TWO_BYTE)) \
+ || !(((WIDEST_UTYPE)(c)) & ~MAX_UTF8_TWO_BYTE)) \
((U8) __BASE_TWO_BYTE_LO(c, NATIVE_TO_UNI)))
/* This is illegal in any well-formed UTF-8 in both EBCDIC and ASCII
#endif
/*
-=head1 Unicode Support
=for apidoc Am|STRLEN|isUTF8_CHAR|const U8 *s|const U8 *e