# define _CC_NON_FINAL_FOLD 21
# define _CC_IS_IN_SOME_FOLD 22
# define _CC_MNEMONIC_CNTRL 23
-/* Unused: 24-31
+
+/* This next group is only used on EBCDIC platforms, so theoretically could be
+ * shared with something entirely different that's only on ASCII platforms */
+# define _CC_UTF8_START_BYTE_IS_FOR_AT_LEAST_SURROGATE 28
+# define _CC_UTF8_IS_START 29
+# define _CC_UTF8_IS_DOWNGRADEABLE_START 30
+# define _CC_UTF8_IS_CONTINUATION 31
+/* Unused: 24-27
* If more bits are needed, one could add a second word for non-64bit
* QUAD_IS_INT systems, using some #ifdefs to distinguish between having a 2nd
* word or not. The IS_IN_SOME_FOLD bit is the most easily expendable, as it
* is used only for optimization (as of this writing), and differs in the
* Latin1 range from the ALPHA bit only in two relatively unimportant
* characters: the masculine and feminine ordinal indicators, so removing it
- * would just cause /i regexes which match them to run less efficiently */
+ * would just cause /i regexes which match them to run less efficiently.
+ * Similarly the EBCDIC-only bits are used just for speed, and could be
+ * replaced by other means */
#if defined(PERL_CORE) || defined(PERL_EXT)
/* An enum version of the character class numbers, to help compilers
? _generic_isCC(*(p), classnum) \
: (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
? _generic_isCC( \
- TWO_BYTE_UTF8_TO_NATIVE(*(p), \
+ EIGHT_BIT_UTF8_TO_NATIVE(*(p), \
*((p)+1 )), \
classnum) \
: utf8)
(UTF8_IS_INVARIANT(*(p)) \
? macro(*(p)) \
: (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
- ? macro(TWO_BYTE_UTF8_TO_NATIVE(*(p), *((p)+1))) \
+ ? macro(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1)))\
: utf8)
#define _generic_LC_swash_utf8(macro, classnum, p) \