#define FOLDEQ_S1_ALREADY_FOLDED (1 << 2)
#define FOLDEQ_S2_ALREADY_FOLDED (1 << 3)
-/*
-=for apidoc ibcmp_utf8
-
-This is a synonym for (! foldEQ_utf8())
-
-=cut
-*/
#define ibcmp_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \
cBOOL(! foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2))
#error UTF8_MAXBYTES must be at least 12
#endif
+/* ^? is defined to be DEL on ASCII systems. See the definition of toCTRL()
+ * for more */
+#define QUESTION_MARK_CTRL DEL_NATIVE
+
#define MAX_UTF8_TWO_BYTE 0x7FF
#define UTF8_MAXBYTES_CASE UTF8_MAXBYTES
#define IN_BYTES (CopHINTS_get(PL_curcop) & HINT_BYTES)
#define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTES)
#define IN_UNI_8_BIT \
- (CopHINTS_get(PL_curcop) & (HINT_UNI_8_BIT|HINT_LOCALE_NOT_CHARS) \
- && ! IN_LOCALE_RUNTIME && ! IN_BYTES)
+ (((CopHINTS_get(PL_curcop) & (HINT_UNI_8_BIT)) \
+ || (CopHINTS_get(PL_curcop) & HINT_LOCALE_PARTIAL \
+ /* -1 below is for :not_characters */ \
+ && _is_in_locale_category(FALSE, -1))) \
+ && ! IN_BYTES)
#define UTF8_ALLOW_EMPTY 0x0001 /* Allow a zero length string */
* U+10FFFF: \xF4\x8F\xBF\xBF \xF9\xA1\xBF\xBF\xBF max legal Unicode
* U+110000: \xF4\x90\x80\x80 \xF9\xA2\xA0\xA0\xA0
* U+110001: \xF4\x90\x80\x81 \xF9\xA2\xA0\xA0\xA1
- */
+ *
+ * BE AWARE that this test doesn't rule out malformed code points, in
+ * particular overlongs */
#ifdef EBCDIC /* Both versions assume well-formed UTF8 */
# define UTF8_IS_SUPER(s) (NATIVE_UTF8_TO_I8(* (U8*) (s)) >= 0xF9 \
&& (NATIVE_UTF8_TO_I8(* (U8*) (s)) > 0xF9 \