+#define UNICODE_GREEK_CAPITAL_LETTER_SIGMA 0x03A3
+#define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
+#define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3
+#define GREEK_SMALL_LETTER_MU 0x03BC
+#define GREEK_CAPITAL_LETTER_MU 0x039C /* Upper and title case of MICRON */
+#define LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS 0x0178 /* Also is title case */
+#define LATIN_CAPITAL_LETTER_SHARP_S 0x1E9E
+
+#define UNI_DISPLAY_ISPRINT 0x0001
+#define UNI_DISPLAY_BACKSLASH 0x0002
+#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH)
+#define UNI_DISPLAY_REGEX (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH)
+
+#ifndef EBCDIC
+# define LATIN_SMALL_LETTER_SHARP_S 0x00DF
+# define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0x00FF
+# define MICRO_SIGN 0x00B5
+# define LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE 0x00C5
+# define LATIN_SMALL_LETTER_A_WITH_RING_ABOVE 0x00E5
+#endif
+
+#define ANYOF_FOLD_SHARP_S(node, input, end) \
+ (ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \
+ (ANYOF_NONBITMAP(node)) && \
+ (ANYOF_FLAGS(node) & ANYOF_LOC_NONBITMAP_FOLD) && \
+ ((end) > (input) + 1) && \
+ toLOWER((input)[0]) == 's' && \
+ toLOWER((input)[1]) == 's')
+#define SHARP_S_SKIP 2
+
+#ifndef EBCDIC
+# define IS_UTF8_CHAR_1(p) \
+ ((p)[0] <= 0x7F)
+# define IS_UTF8_CHAR_2(p) \
+ ((p)[0] >= 0xC2 && (p)[0] <= 0xDF && \
+ (p)[1] >= 0x80 && (p)[1] <= 0xBF)
+# define IS_UTF8_CHAR_3a(p) \
+ ((p)[0] == 0xE0 && \
+ (p)[1] >= 0xA0 && (p)[1] <= 0xBF && \
+ (p)[2] >= 0x80 && (p)[2] <= 0xBF)
+# define IS_UTF8_CHAR_3b(p) \
+ ((p)[0] >= 0xE1 && (p)[0] <= 0xEC && \
+ (p)[1] >= 0x80 && (p)[1] <= 0xBF && \
+ (p)[2] >= 0x80 && (p)[2] <= 0xBF)
+# define IS_UTF8_CHAR_3c(p) \
+ ((p)[0] == 0xED && \
+ (p)[1] >= 0x80 && (p)[1] <= 0xBF && \
+ (p)[2] >= 0x80 && (p)[2] <= 0xBF)
+ /* In IS_UTF8_CHAR_3c(p) one could use
+ * (p)[1] >= 0x80 && (p)[1] <= 0x9F
+ * if one wanted to exclude surrogates. */
+# define IS_UTF8_CHAR_3d(p) \
+ ((p)[0] >= 0xEE && (p)[0] <= 0xEF && \
+ (p)[1] >= 0x80 && (p)[1] <= 0xBF && \
+ (p)[2] >= 0x80 && (p)[2] <= 0xBF)
+# define IS_UTF8_CHAR_4a(p) \
+ ((p)[0] == 0xF0 && \
+ (p)[1] >= 0x90 && (p)[1] <= 0xBF && \
+ (p)[2] >= 0x80 && (p)[2] <= 0xBF && \
+ (p)[3] >= 0x80 && (p)[3] <= 0xBF)
+# define IS_UTF8_CHAR_4b(p) \
+ ((p)[0] >= 0xF1 && (p)[0] <= 0xF3 && \
+ (p)[1] >= 0x80 && (p)[1] <= 0xBF && \
+ (p)[2] >= 0x80 && (p)[2] <= 0xBF && \
+ (p)[3] >= 0x80 && (p)[3] <= 0xBF)
+/* In IS_UTF8_CHAR_4c(p) one could use
+ * (p)[0] == 0xF4
+ * if one wanted to stop at the Unicode limit U+10FFFF.
+ * The 0xF7 allows us to go to 0x1fffff (0x200000 would
+ * require five bytes). Not doing any further code points
+ * since that is not needed (and that would not be strict
+ * UTF-8, anyway). The "slow path" in Perl_is_utf8_char()
+ * will take care of the "extended UTF-8". */
+# define IS_UTF8_CHAR_4c(p) \
+ ((p)[0] >= 0xF4 && (p)[0] <= 0xF7 && \
+ (p)[1] >= 0x80 && (p)[1] <= 0xBF && \
+ (p)[2] >= 0x80 && (p)[2] <= 0xBF && \
+ (p)[3] >= 0x80 && (p)[3] <= 0xBF)
+
+# define IS_UTF8_CHAR_3(p) \
+ (IS_UTF8_CHAR_3a(p) || \
+ IS_UTF8_CHAR_3b(p) || \
+ IS_UTF8_CHAR_3c(p) || \
+ IS_UTF8_CHAR_3d(p))
+# define IS_UTF8_CHAR_4(p) \
+ (IS_UTF8_CHAR_4a(p) || \
+ IS_UTF8_CHAR_4b(p) || \
+ IS_UTF8_CHAR_4c(p))
+
+/* IS_UTF8_CHAR(p) is strictly speaking wrong (not UTF-8) because it
+ * (1) allows UTF-8 encoded UTF-16 surrogates
+ * (2) it allows code points past U+10FFFF.
+ * The Perl_is_utf8_char() full "slow" code will handle the Perl
+ * "extended UTF-8". */
+# define IS_UTF8_CHAR(p, n) \
+ ((n) == 1 ? IS_UTF8_CHAR_1(p) : \
+ (n) == 2 ? IS_UTF8_CHAR_2(p) : \
+ (n) == 3 ? IS_UTF8_CHAR_3(p) : \
+ (n) == 4 ? IS_UTF8_CHAR_4(p) : 0)
+
+# define IS_UTF8_CHAR_FAST(n) ((n) <= 4)
+
+#else /* EBCDIC */
+
+/* This is an attempt to port IS_UTF8_CHAR to EBCDIC based on eyeballing.
+ * untested. If want to exclude surrogates and above-Unicode, see the
+ * definitions for UTF8_IS_SURROGATE and UTF8_IS_SUPER */
+# define IS_UTF8_CHAR_1(p) \
+ (NATIVE_TO_ASCII((p)[0]) <= 0x9F)
+# define IS_UTF8_CHAR_2(p) \
+ (NATIVE_TO_I8((p)[0]) >= 0xC5 && NATIVE_TO_I8((p)[0]) <= 0xDF && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF)
+# define IS_UTF8_CHAR_3(p) \
+ (NATIVE_TO_I8((p)[0]) == 0xE1 && NATIVE_TO_I8((p)[1]) <= 0xEF && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[2]) >= 0xA0 && NATIVE_TO_I8((p)[2]) <= 0xBF)
+# define IS_UTF8_CHAR_4a(p) \
+ (NATIVE_TO_I8((p)[0]) == 0xF0 && \
+ NATIVE_TO_I8((p)[1]) >= 0xB0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[2]) >= 0xA0 && NATIVE_TO_I8((p)[2]) <= 0xBF && \
+ NATIVE_TO_I8((p)[3]) >= 0xA0 && NATIVE_TO_I8((p)[3]) <= 0xBF)
+# define IS_UTF8_CHAR_4b(p) \
+ (NATIVE_TO_I8((p)[0]) >= 0xF1 && NATIVE_TO_I8((p)[0]) <= 0xF7 && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[2]) >= 0xA0 && NATIVE_TO_I8((p)[2]) <= 0xBF && \
+ NATIVE_TO_I8((p)[3]) >= 0xA0 && NATIVE_TO_I8((p)[3]) <= 0xBF)
+# define IS_UTF8_CHAR_5a(p) \
+ (NATIVE_TO_I8((p)[0]) == 0xF8 && \
+ NATIVE_TO_I8((p)[1]) >= 0xA8 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[2]) >= 0xA0 && NATIVE_TO_I8((p)[2]) <= 0xBF && \
+ NATIVE_TO_I8((p)[3]) >= 0xA0 && NATIVE_TO_I8((p)[3]) <= 0xBF)
+# define IS_UTF8_CHAR_5b(p) \
+ (NATIVE_TO_I8((p)[0]) >= 0xF9 && NATIVE_TO_I8((p)[1]) <= 0xFB && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[2]) >= 0xA0 && NATIVE_TO_I8((p)[2]) <= 0xBF && \
+ NATIVE_TO_I8((p)[3]) >= 0xA0 && NATIVE_TO_I8((p)[3]) <= 0xBF)
+
+# define IS_UTF8_CHAR_4(p) \
+ (IS_UTF8_CHAR_4a(p) || \
+ IS_UTF8_CHAR_4b(p))
+# define IS_UTF8_CHAR_5(p) \
+ (IS_UTF8_CHAR_5a(p) || \
+ IS_UTF8_CHAR_5b(p))
+# define IS_UTF8_CHAR(p, n) \
+ ((n) == 1 ? IS_UTF8_CHAR_1(p) : \
+ (n) == 2 ? IS_UTF8_CHAR_2(p) : \
+ (n) == 3 ? IS_UTF8_CHAR_3(p) : \
+ (n) == 4 ? IS_UTF8_CHAR_4(p) : \
+ (n) == 5 ? IS_UTF8_CHAR_5(p) : 0)
+
+# define IS_UTF8_CHAR_FAST(n) ((n) <= 5)
+
+#endif /* IS_UTF8_CHAR() for UTF-8 */
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: t
+ * End:
+ *
+ * ex: set ts=8 sts=4 sw=4 noet:
+ */