toLOWER((input)[1]) == 's')
#define SHARP_S_SKIP 2
-#ifdef EBCDIC
-/* IS_UTF8_CHAR() is not ported to EBCDIC */
-#else
+#ifndef EBCDIC
# define IS_UTF8_CHAR_1(p) \
((p)[0] <= 0x7F)
# define IS_UTF8_CHAR_2(p) \
# define IS_UTF8_CHAR_FAST(n) ((n) <= 4)
+#else /* EBCDIC */
+
+/* This is an attempt to port IS_UTF8_CHAR to EBCDIC based on eyeballing.
+ * untested. If want to exclude surrogates and above-Unicode, see the
+ * definitions for UTF8_IS_SURROGATE and UTF8_IS_SUPER */
+# define IS_UTF8_CHAR_1(p) \
+ (NATIVE_TO_ASCII((p)[0]) <= 0x9F)
+# define IS_UTF8_CHAR_2(p) \
+ (NATIVE_TO_I8((p)[0]) >= 0xC5 && NATIVE_TO_I8((p)[0]) <= 0xDF && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF)
+# define IS_UTF8_CHAR_3(p) \
+ (NATIVE_TO_I8((p)[0]) == 0xE1 && NATIVE_TO_I8((p)[1]) <= 0xEF && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[2]) >= 0xA0 && NATIVE_TO_I8((p)[2]) <= 0xBF)
+# define IS_UTF8_CHAR_4a(p) \
+ (NATIVE_TO_I8((p)[0]) == 0xF0 && \
+ NATIVE_TO_I8((p)[1]) >= 0xB0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[2]) >= 0xA0 && NATIVE_TO_I8((p)[2]) <= 0xBF && \
+ NATIVE_TO_I8((p)[3]) >= 0xA0 && NATIVE_TO_I8((p)[3]) <= 0xBF)
+# define IS_UTF8_CHAR_4b(p) \
+ (NATIVE_TO_I8((p)[0]) >= 0xF1 && NATIVE_TO_I8((p)[0]) <= 0xF7 && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[2]) >= 0xA0 && NATIVE_TO_I8((p)[2]) <= 0xBF && \
+ NATIVE_TO_I8((p)[3]) >= 0xA0 && NATIVE_TO_I8((p)[3]) <= 0xBF)
+# define IS_UTF8_CHAR_5a(p) \
+ (NATIVE_TO_I8((p)[0]) == 0xF8 && \
+ NATIVE_TO_I8((p)[1]) >= 0xA8 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[2]) >= 0xA0 && NATIVE_TO_I8((p)[2]) <= 0xBF && \
+ NATIVE_TO_I8((p)[3]) >= 0xA0 && NATIVE_TO_I8((p)[3]) <= 0xBF)
+# define IS_UTF8_CHAR_5b(p) \
+ (NATIVE_TO_I8((p)[0]) >= 0xF9 && NATIVE_TO_I8((p)[1]) <= 0xFB && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[1]) >= 0xA0 && NATIVE_TO_I8((p)[1]) <= 0xBF && \
+ NATIVE_TO_I8((p)[2]) >= 0xA0 && NATIVE_TO_I8((p)[2]) <= 0xBF && \
+ NATIVE_TO_I8((p)[3]) >= 0xA0 && NATIVE_TO_I8((p)[3]) <= 0xBF)
+
+# define IS_UTF8_CHAR_4(p) \
+ (IS_UTF8_CHAR_4a(p) || \
+ IS_UTF8_CHAR_4b(p))
+# define IS_UTF8_CHAR_5(p) \
+ (IS_UTF8_CHAR_5a(p) || \
+ IS_UTF8_CHAR_5b(p))
+# define IS_UTF8_CHAR(p, n) \
+ ((n) == 1 ? IS_UTF8_CHAR_1(p) : \
+ (n) == 2 ? IS_UTF8_CHAR_2(p) : \
+ (n) == 3 ? IS_UTF8_CHAR_3(p) : \
+ (n) == 4 ? IS_UTF8_CHAR_4(p) : \
+ (n) == 5 ? IS_UTF8_CHAR_5(p) : 0)
+
+# define IS_UTF8_CHAR_FAST(n) ((n) <= 5)
+
#endif /* IS_UTF8_CHAR() for UTF-8 */
/*