-/* Macros that differ between EBCDIC and ASCII. Where C89 defines a function,
- * that is used in the EBCDIC form, because in EBCDIC we do not do locales:
- * therefore can use native functions. For those where C89 doesn't define a
- * function, use our function, assuming that the EBCDIC code page is isomorphic
- * with Latin1, which the three currently recognized by Perl are. Some libc's
- * have an isblank(), but it's not guaranteed. */
-#ifdef EBCDIC
-# define isALPHA(c) isalpha(c)
-# define isALPHANUMERIC(c) isalnum(c)
-# define isBLANK(c) ((c) == ' ' || (c) == '\t' || NATIVE_TO_LATIN1(c) == 0xA0)
-# define isCNTRL(c) iscntrl(c)
-# define isDIGIT(c) isdigit(c)
-# define isGRAPH(c) isgraph(c)
-# define isIDFIRST(c) (isALPHA(c) || (c) == '_')
-# define isLOWER(c) islower(c)
-# define isPRINT(c) isprint(c)
-# define isPSXSPC(c) isspace(c)
-# define isPUNCT(c) ispunct(c)
-# define isSPACE(c) (isPSXSPC(c) /* && (c) != '\v' (Experimentally making
- these macros identical) */)
-# define isUPPER(c) isupper(c)
-# define isXDIGIT(c) isxdigit(c)
-# define isWORDCHAR(c) (isalnum(c) || (c) == '_')
-# define toLOWER(c) tolower(c)
-# define toUPPER(c) toupper(c)
-#else /* Not EBCDIC: ASCII-only matching */
-# define isALPHANUMERIC(c) isALPHANUMERIC_A(c)
-# define isALPHA(c) isALPHA_A(c)
-# define isBLANK(c) isBLANK_A(c)
-# define isCNTRL(c) isCNTRL_A(c)
-# define isDIGIT(c) isDIGIT_A(c)
-# define isGRAPH(c) isGRAPH_A(c)
-# define isIDFIRST(c) isIDFIRST_A(c)
-# define isLOWER(c) isLOWER_A(c)
-# define isPRINT(c) isPRINT_A(c)
-# define isPSXSPC(c) isPSXSPC_A(c)
-# define isPUNCT(c) isPUNCT_A(c)
-# define isSPACE(c) isSPACE_A(c)
-# define isUPPER(c) isUPPER_A(c)
-# define isWORDCHAR(c) isWORDCHAR_A(c)
-# define isXDIGIT(c) isXDIGIT_A(c)
-
- /* ASCII casing. These could also be written as
- #define toLOWER(c) (isASCII(c) ? toLOWER_LATIN1(c) : (c))
- #define toUPPER(c) (isASCII(c) ? toUPPER_LATIN1_MOD(c) : (c))
- which uses table lookup and mask instead of subtraction. (This would
- work because the _MOD does not apply in the ASCII range) */
-# define toLOWER(c) (isUPPER(c) ? (c) + ('a' - 'A') : (c))
-# define toUPPER(c) (isLOWER(c) ? (c) - ('a' - 'A') : (c))
-#endif
+# ifdef EBCDIC
+# define isASCII(c) _generic_isCC(c, _CC_ASCII)
+# endif
+
+ /* Participates in a single-character fold with a character above 255 */
+# define _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_SIMPLE_FOLD)))
+
+ /* Like the above, but also can be part of a multi-char fold */
+# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
+
+# define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)
+# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
+ _generic_isCC(c, _CC_NON_FINAL_FOLD)
+# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
+ _generic_isCC(c, _CC_IS_IN_SOME_FOLD)
+# define _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
+ _generic_isCC(c, _CC_MNEMONIC_CNTRL)
+#else /* else we don't have perl.h H_PERL */
+
+ /* If we don't have perl.h, we are compiling a utility program. Below we
+ * hard-code various macro definitions that wouldn't otherwise be available
+ * to it. Most are coded based on first principles. These are written to
+ * avoid EBCDIC vs. ASCII #ifdef's as much as possible. */
+# define isDIGIT_A(c) inRANGE(c, '0', '9')
+# define isBLANK_A(c) ((c) == ' ' || (c) == '\t')
+# define isSPACE_A(c) (isBLANK_A(c) \
+ || (c) == '\n' \
+ || (c) == '\r' \
+ || (c) == '\v' \
+ || (c) == '\f')
+ /* On EBCDIC, there are gaps between 'i' and 'j'; 'r' and 's'. Same for
+ * uppercase. The tests for those aren't necessary on ASCII, but hurt only
+ * performance (if optimization isn't on), and allow the same code to be
+ * used for both platform types */
+# define isLOWER_A(c) inRANGE((c), 'a', 'i') \
+ || inRANGE((c), 'j', 'r') \
+ || inRANGE((c), 's', 'z')
+# define isUPPER_A(c) inRANGE((c), 'A', 'I') \
+ || inRANGE((c), 'J', 'R') \
+ || inRANGE((c), 'S', 'Z')
+# define isALPHA_A(c) (isUPPER_A(c) || isLOWER_A(c))
+# define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c))
+# define isWORDCHAR_A(c) (isALPHANUMERIC_A(c) || (c) == '_')
+# define isIDFIRST_A(c) (isALPHA_A(c) || (c) == '_')
+# define isXDIGIT_A(c) ( isDIGIT_A(c) \
+ || inRANGE((c), 'a', 'f') \
+ || inRANGE((c), 'A', 'F')
+# define isPUNCT_A(c) ((c) == '-' || (c) == '!' || (c) == '"' \
+ || (c) == '#' || (c) == '$' || (c) == '%' \
+ || (c) == '&' || (c) == '\'' || (c) == '(' \
+ || (c) == ')' || (c) == '*' || (c) == '+' \
+ || (c) == ',' || (c) == '.' || (c) == '/' \
+ || (c) == ':' || (c) == ';' || (c) == '<' \
+ || (c) == '=' || (c) == '>' || (c) == '?' \
+ || (c) == '@' || (c) == '[' || (c) == '\\' \
+ || (c) == ']' || (c) == '^' || (c) == '_' \
+ || (c) == '`' || (c) == '{' || (c) == '|' \
+ || (c) == '}' || (c) == '~')
+# define isGRAPH_A(c) (isALPHANUMERIC_A(c) || isPUNCT_A(c))
+# define isPRINT_A(c) (isGRAPH_A(c) || (c) == ' ')
+
+# ifdef EBCDIC
+ /* The below is accurate for the 3 EBCDIC code pages traditionally
+ * supported by perl. The only difference between them in the controls
+ * is the position of \n, and that is represented symbolically below */
+# define isCNTRL_A(c) ((c) == '\0' || (c) == '\a' || (c) == '\b' \
+ || (c) == '\f' || (c) == '\n' || (c) == '\r' \
+ || (c) == '\t' || (c) == '\v' \
+ || inRANGE((c), 1, 3) /* SOH, STX, ETX */ \
+ || (c) == 7 /* U+7F DEL */ \
+ || inRANGE((c), 0x0E, 0x13) /* SO SI DLE \
+ DC[1-3] */ \
+ || (c) == 0x18 /* U+18 CAN */ \
+ || (c) == 0x19 /* U+19 EOM */ \
+ || inRANGE((c), 0x1C, 0x1F) /* [FGRU]S */ \
+ || (c) == 0x26 /* U+17 ETB */ \
+ || (c) == 0x27 /* U+1B ESC */ \
+ || (c) == 0x2D /* U+05 ENQ */ \
+ || (c) == 0x2E /* U+06 ACK */ \
+ || (c) == 0x32 /* U+16 SYN */ \
+ || (c) == 0x37 /* U+04 EOT */ \
+ || (c) == 0x3C /* U+14 DC4 */ \
+ || (c) == 0x3D /* U+15 NAK */ \
+ || (c) == 0x3F)/* U+1A SUB */
+# define isASCII(c) (isCNTRL_A(c) || isPRINT_A(c))
+# else /* isASCII is already defined for ASCII platforms, so can use that to
+ define isCNTRL */
+# define isCNTRL_A(c) (isASCII(c) && ! isPRINT_A(c))
+# endif
+
+ /* The _L1 macros may be unnecessary for the utilities; I (khw) added them
+ * during debugging, and it seems best to keep them. We may be called
+ * without NATIVE_TO_LATIN1 being defined. On ASCII platforms, it doesn't
+ * do anything anyway, so make it not a problem */
+# if ! defined(EBCDIC) && ! defined(NATIVE_TO_LATIN1)
+# define NATIVE_TO_LATIN1(ch) (ch)
+# endif
+# define isALPHA_L1(c) (isUPPER_L1(c) || isLOWER_L1(c))
+# define isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT_A(c))
+# define isBLANK_L1(c) (isBLANK_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && NATIVE_TO_LATIN1((U8) c) == 0xA0))
+# define isCNTRL_L1(c) (FITS_IN_8_BITS(c) && (! isPRINT_L1(c)))
+# define isGRAPH_L1(c) (isPRINT_L1(c) && (! isBLANK_L1(c)))
+# define isLOWER_L1(c) (isLOWER_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && (( NATIVE_TO_LATIN1((U8) c) >= 0xDF \
+ && NATIVE_TO_LATIN1((U8) c) != 0xF7) \
+ || NATIVE_TO_LATIN1((U8) c) == 0xAA \
+ || NATIVE_TO_LATIN1((U8) c) == 0xBA \
+ || NATIVE_TO_LATIN1((U8) c) == 0xB5)))
+# define isPRINT_L1(c) (isPRINT_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && NATIVE_TO_LATIN1((U8) c) >= 0xA0))
+# define isPUNCT_L1(c) (isPUNCT_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && ( NATIVE_TO_LATIN1((U8) c) == 0xA1 \
+ || NATIVE_TO_LATIN1((U8) c) == 0xA7 \
+ || NATIVE_TO_LATIN1((U8) c) == 0xAB \
+ || NATIVE_TO_LATIN1((U8) c) == 0xB6 \
+ || NATIVE_TO_LATIN1((U8) c) == 0xB7 \
+ || NATIVE_TO_LATIN1((U8) c) == 0xBB \
+ || NATIVE_TO_LATIN1((U8) c) == 0xBF)))
+# define isSPACE_L1(c) (isSPACE_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && ( NATIVE_TO_LATIN1((U8) c) == 0x85 \
+ || NATIVE_TO_LATIN1((U8) c) == 0xA0)))
+# define isUPPER_L1(c) (isUPPER_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && ( IN_RANGE(NATIVE_TO_LATIN1((U8) c), \
+ 0xC0, 0xDE) \
+ && NATIVE_TO_LATIN1((U8) c) != 0xD7)))
+# define isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT_A(c))
+# define isIDFIRST_L1(c) (isALPHA_L1(c) || NATIVE_TO_LATIN1(c) == '_')
+# define isCHARNAME_CONT(c) (isWORDCHAR_L1(c) \
+ || isBLANK_L1(c) \
+ || (c) == '-' \
+ || (c) == '(' \
+ || (c) == ')')
+ /* The following are not fully accurate in the above-ASCII range. I (khw)
+ * don't think it's necessary to be so for the purposes where this gets
+ * compiled */
+# define _isQUOTEMETA(c) (FITS_IN_8_BITS(c) && ! isWORDCHAR_L1(c))
+# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) isALPHA_L1(c)
+
+ /* And these aren't accurate at all. They are useful only for above
+ * Latin1, which utilities and bootstrapping don't deal with */
+# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) 0
+# define _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) 0
+# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) 0
+
+ /* Many of the macros later in this file are defined in terms of these. By
+ * implementing them with a function, which converts the class number into
+ * a call to the desired macro, all of the later ones work. However, that
+ * function won't be actually defined when building a utility program (no
+ * perl.h), and so a compiler error will be generated if one is attempted
+ * to be used. And the above-Latin1 code points require Unicode tables to
+ * be present, something unlikely to be the case when bootstrapping */
+# define _generic_isCC(c, classnum) \
+ (FITS_IN_8_BITS(c) && S_bootstrap_ctype((U8) (c), (classnum), TRUE))
+# define _generic_isCC_A(c, classnum) \
+ (FITS_IN_8_BITS(c) && S_bootstrap_ctype((U8) (c), (classnum), FALSE))
+#endif /* End of no perl.h H_PERL */
+
+#define isALPHANUMERIC(c) isALPHANUMERIC_A(c)
+#define isALPHA(c) isALPHA_A(c)
+#define isASCII_A(c) isASCII(c)
+#define isASCII_L1(c) isASCII(c)
+#define isBLANK(c) isBLANK_A(c)
+#define isCNTRL(c) isCNTRL_A(c)
+#define isDIGIT(c) isDIGIT_A(c)
+#define isGRAPH(c) isGRAPH_A(c)
+#define isIDFIRST(c) isIDFIRST_A(c)
+#define isLOWER(c) isLOWER_A(c)
+#define isPRINT(c) isPRINT_A(c)
+#define isPSXSPC_A(c) isSPACE_A(c)
+#define isPSXSPC(c) isPSXSPC_A(c)
+#define isPSXSPC_L1(c) isSPACE_L1(c)
+#define isPUNCT(c) isPUNCT_A(c)
+#define isSPACE(c) isSPACE_A(c)
+#define isUPPER(c) isUPPER_A(c)
+#define isWORDCHAR(c) isWORDCHAR_A(c)
+#define isXDIGIT(c) isXDIGIT_A(c)
+
+/* ASCII casing. These could also be written as
+ #define toLOWER(c) (isASCII(c) ? toLOWER_LATIN1(c) : (c))
+ #define toUPPER(c) (isASCII(c) ? toUPPER_LATIN1_MOD(c) : (c))
+ which uses table lookup and mask instead of subtraction. (This would
+ work because the _MOD does not apply in the ASCII range).
+
+ These actually are UTF-8 invariant casing, not just ASCII, as any non-ASCII
+ UTF-8 invariants are neither upper nor lower. (Only on EBCDIC platforms are
+ there non-ASCII invariants, and all of them are controls.) */
+#define toLOWER(c) (isUPPER(c) ? (U8)((c) + ('a' - 'A')) : (c))
+#define toUPPER(c) (isLOWER(c) ? (U8)((c) - ('a' - 'A')) : (c))