# define USE_UTF8_IN_NAMES (PL_hints & HINT_UTF8)
#endif
+#define to_uni_fold(c, p, lenp) _to_uni_fold_flags(c, p, lenp, 1)
+#define to_utf8_fold(c, p, lenp) _to_utf8_fold_flags(c, p, lenp, 1)
+
/* Source backward compatibility. */
#define uvuni_to_utf8(d, uv) uvuni_to_utf8_flags(d, uv, 0)
#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
+#define foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \
+ foldEQ_utf8_flags(s1, pe1, l1, u1, s2, pe2, l2, u2, 0)
+#define FOLDEQ_UTF8_NOMIX_ASCII (1 << 0)
+#define FOLDEQ_UTF8_LOCALE (1 << 1)
+
/*
=for apidoc ibcmp_utf8
* version. An example of maximal expansion is the U+03B0 which
* uppercases to U+03C5 U+0308 U+0301. The Unicode databases that
* tell these things are UnicodeData.txt, CaseFolding.txt, and
- * SpecialCasing.txt. */
-#define UTF8_MAXBYTES_CASE 6
+ * SpecialCasing.txt. The value is 6 for strict Unicode characters, but it has
+ * to be as big as Perl allows for a single character */
+#define UTF8_MAXBYTES_CASE UTF8_MAXBYTES
+
+/* A Unicode character can fold to up to 3 characters */
+#define UTF8_MAX_FOLD_CHAR_EXPAND 3
#define IN_BYTES (CopHINTS_get(PL_curcop) & HINT_BYTES)
#define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTES)
#define UTF8_ALLOW_FFFF 0
#define UTF8_ALLOW_SURROGATE 0
-#define UTF8_DISALLOW_ILLEGAL_INTERCHANGE \
- (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_NONCHAR|UTF8_DISALLOW_SURROGATE)
+#define UTF8_DISALLOW_ILLEGAL_INTERCHANGE (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_NONCHAR|UTF8_DISALLOW_SURROGATE|UTF8_DISALLOW_FE_FF)
#define UTF8_WARN_ILLEGAL_INTERCHANGE \
- (UTF8_WARN_SUPER|UTF8_WARN_NONCHAR|UTF8_WARN_SURROGATE)
+ (UTF8_WARN_SUPER|UTF8_WARN_NONCHAR|UTF8_WARN_SURROGATE|UTF8_WARN_FE_FF)
#define UTF8_ALLOW_ANY \
(~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_WARN_ILLEGAL_INTERCHANGE))
#define UTF8_ALLOW_ANYUV \
#define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
#define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3
#define GREEK_SMALL_LETTER_MU 0x03BC
+#define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case of MICRON */
+#define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */
+#define LATIN_CAPITAL_LETTER_SHARP_S 0x1E9E
#define UNI_DISPLAY_ISPRINT 0x0001
#define UNI_DISPLAY_BACKSLASH 0x0002
# define LATIN_SMALL_LETTER_SHARP_S 0x00DF
# define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0x00FF
# define MICRO_SIGN 0x00B5
+# define LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE 0x00C5
+# define LATIN_SMALL_LETTER_A_WITH_RING_ABOVE 0x00E5
#endif
#define ANYOF_FOLD_SHARP_S(node, input, end) \
(ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \
- (ANYOF_FLAGS(node) & ANYOF_NONBITMAP) && \
+ (ANYOF_NONBITMAP(node)) && \
(ANYOF_FLAGS(node) & ANYOF_LOC_NONBITMAP_FOLD) && \
((end) > (input) + 1) && \
toLOWER((input)[0]) == 's' && \