-/* Macros that differ between EBCDIC and ASCII. Where C89 defines a function,
- * that is used in the EBCDIC form, because in EBCDIC we do not do locales:
- * therefore can use native functions. For those where C89 doesn't define a
- * function, use our function, assuming that the EBCDIC code page is isomorphic
- * with Latin1, which the three currently recognized by Perl are. Some libc's
- * have an isblank(), but it's not guaranteed. */
-#ifdef EBCDIC
-# define isALPHA(c) isalpha(c)
-# define isALPHANUMERIC(c) isalnum(c)
-# define isBLANK(c) ((c) == ' ' || (c) == '\t' || NATIVE_TO_LATIN1(c) == 0xA0)
-# define isCNTRL(c) iscntrl(c)
-# define isDIGIT(c) isdigit(c)
-# define isGRAPH(c) isgraph(c)
-# define isIDFIRST(c) (isALPHA(c) || (c) == '_')
-# define isLOWER(c) islower(c)
-# define isPRINT(c) isprint(c)
-# define isPSXSPC(c) isspace(c)
-# define isPUNCT(c) ispunct(c)
-# define isSPACE(c) (isPSXSPC(c) /* && (c) != '\v' (Experimentally making
- these macros identical) */)
-# define isUPPER(c) isupper(c)
-# define isXDIGIT(c) isxdigit(c)
-# define isWORDCHAR(c) (isalnum(c) || (c) == '_')
-# define toLOWER(c) tolower(c)
-# define toUPPER(c) toupper(c)
-#else /* Not EBCDIC: ASCII-only matching */
-# define isALPHANUMERIC(c) isALPHANUMERIC_A(c)
-# define isALPHA(c) isALPHA_A(c)
-# define isBLANK(c) isBLANK_A(c)
-# define isCNTRL(c) isCNTRL_A(c)
-# define isDIGIT(c) isDIGIT_A(c)
-# define isGRAPH(c) isGRAPH_A(c)
-# define isIDFIRST(c) isIDFIRST_A(c)
-# define isLOWER(c) isLOWER_A(c)
-# define isPRINT(c) isPRINT_A(c)
-# define isPSXSPC(c) isPSXSPC_A(c)
-# define isPUNCT(c) isPUNCT_A(c)
-# define isSPACE(c) isSPACE_A(c)
-# define isUPPER(c) isUPPER_A(c)
-# define isWORDCHAR(c) isWORDCHAR_A(c)
-# define isXDIGIT(c) isXDIGIT_A(c)
-
- /* ASCII casing. These could also be written as
- #define toLOWER(c) (isASCII(c) ? toLOWER_LATIN1(c) : (c))
- #define toUPPER(c) (isASCII(c) ? toUPPER_LATIN1_MOD(c) : (c))
- which uses table lookup and mask instead of subtraction. (This would
- work because the _MOD does not apply in the ASCII range) */
-# define toLOWER(c) (isUPPER(c) ? (c) + ('a' - 'A') : (c))
-# define toUPPER(c) (isLOWER(c) ? (c) - ('a' - 'A') : (c))
-#endif
+# ifdef EBCDIC
+# define isASCII(c) _generic_isCC(c, _CC_ASCII)
+# endif
+
+ /* Participates in a single-character fold with a character above 255 */
+# define _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_SIMPLE_FOLD)))
+
+ /* Like the above, but also can be part of a multi-char fold */
+# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
+
+# define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)
+# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
+ _generic_isCC(c, _CC_NON_FINAL_FOLD)
+# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
+ _generic_isCC(c, _CC_IS_IN_SOME_FOLD)
+# define _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
+ _generic_isCC(c, _CC_MNEMONIC_CNTRL)
+#else /* else we don't have perl.h H_PERL */
+
+ /* If we don't have perl.h, we are compiling a utility program. Below we
+ * hard-code various macro definitions that wouldn't otherwise be available
+ * to it. Most are coded based on first principals. First some ones common
+ * to both ASCII and EBCDIC */
+# define isDIGIT_A(c) ((c) <= '9' && (c) >= '0')
+# define isBLANK_A(c) ((c) == ' ' || (c) == '\t')
+# define isSPACE_A(c) (isBLANK_A(c) \
+ || (c) == '\n' \
+ || (c) == '\r' \
+ || (c) == '\v' \
+ || (c) == '\f')
+# ifdef EBCDIC /* There are gaps between 'i' and 'j'; 'r' and 's'. Same
+ for uppercase. This is ordered to exclude most things
+ early */
+# define isLOWER_A(c) ((c) >= 'a' && (c) <= 'z' \
+ && ((c) <= 'i' \
+ || ((c) >= 'j' && (c) <= 'r') \
+ || (c) >= 's'))
+# define isUPPER_A(c) ((c) >= 'A' && (c) <= 'Z' \
+ && ((c) <= 'I' \
+ || ((c) >= 'J' && (c) <= 'R') \
+ || (c) >= 'S'))
+# else /* ASCII platform. */
+# define isLOWER_A(c) ((c) >= 'a' && (c) <= 'z')
+# define isUPPER_A(c) ((c) <= 'Z' && (c) >= 'A')
+# endif
+
+ /* Some more ASCII, non-ASCII common definitions */
+# define isALPHA_A(c) (isUPPER_A(c) || isLOWER_A(c))
+# define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c))
+# define isWORDCHAR_A(c) (isALPHANUMERIC_A(c) || (c) == '_')
+# define isIDFIRST_A(c) (isALPHA_A(c) || (c) == '_')
+# define isXDIGIT_A(c) (isDIGIT_A(c) \
+ || ((c) >= 'a' && (c) <= 'f') \
+ || ((c) <= 'F' && (c) >= 'A'))
+
+# ifdef EBCDIC
+# define isPUNCT_A(c) ((c) == '-' || (c) == '!' || (c) == '"' \
+ || (c) == '#' || (c) == '$' || (c) == '%' \
+ || (c) == '&' || (c) == '\'' || (c) == '(' \
+ || (c) == ')' || (c) == '*' || (c) == '+' \
+ || (c) == ',' || (c) == '.' || (c) == '/' \
+ || (c) == ':' || (c) == ';' || (c) == '<' \
+ || (c) == '=' || (c) == '>' || (c) == '?' \
+ || (c) == '@' || (c) == '[' || (c) == '\\' \
+ || (c) == ']' || (c) == '^' || (c) == '_' \
+ || (c) == '`' || (c) == '{' || (c) == '|' \
+ || (c) == '}' || (c) == '~')
+# define isGRAPH_A(c) (isALPHANUMERIC_A(c) || isPUNCT_A(c))
+# define isPRINT_A(c) (isGRAPH_A(c) || (c) == ' ')
+
+# ifdef QUESTION_MARK_CTRL
+# define _isQMC(c) ((c) == QUESTION_MARK_CTRL)
+# else
+# define _isQMC(c) 0
+# endif
+
+ /* I (khw) can't think of a way to define all the ASCII controls
+ * without resorting to a libc (locale-sensitive) call. But we know
+ * that all controls but the question-mark one are in the range 0-0x3f.
+ * This makes sure that all the controls that have names are included,
+ * and all controls that are also considered ASCII in the locale. This
+ * may include more or fewer than what it actually should, but the
+ * wrong ones are less-important controls, so likely won't impact
+ * things (keep in mind that this is compiled only if perl.h isn't
+ * available). The question mark control is included if available */
+# define isCNTRL_A(c) (((c) < 0x40 && isascii(c)) \
+ || (c) == '\0' || (c) == '\a' || (c) == '\b' \
+ || (c) == '\f' || (c) == '\n' || (c) == '\r' \
+ || (c) == '\t' || (c) == '\v' || _isQMC(c))
+
+# define isASCII(c) (isCNTRL_A(c) || isPRINT_A(c))
+# else /* ASCII platform; things are simpler, and isASCII has already
+ been defined */
+# define isGRAPH_A(c) (((c) > ' ' && (c) < 127))
+# define isPRINT_A(c) (isGRAPH_A(c) || (c) == ' ')
+# define isPUNCT_A(c) (isGRAPH_A(c) && (! isALPHANUMERIC_A(c)))
+# define isCNTRL_A(c) (isASCII(c) && (! isPRINT_A(c)))
+# endif
+
+ /* The _L1 macros may be unnecessary for the utilities; I (khw) added them
+ * during debugging, and it seems best to keep them. We may be called
+ * without NATIVE_TO_LATIN1 being defined. On ASCII platforms, it doesn't
+ * do anything anyway, so make it not a problem */
+# if ! defined(EBCDIC) && ! defined(NATIVE_TO_LATIN1)
+# define NATIVE_TO_LATIN1(ch) (ch)
+# endif
+# define isALPHA_L1(c) (isUPPER_L1(c) || isLOWER_L1(c))
+# define isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT_A(c))
+# define isBLANK_L1(c) (isBLANK_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && NATIVE_TO_LATIN1((U8) c) == 0xA0))
+# define isCNTRL_L1(c) (FITS_IN_8_BITS(c) && (! isPRINT_L1(c)))
+# define isGRAPH_L1(c) (isPRINT_L1(c) && (! isBLANK_L1(c)))
+# define isLOWER_L1(c) (isLOWER_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && ((NATIVE_TO_LATIN1((U8) c) >= 0xDF \
+ && NATIVE_TO_LATIN1((U8) c) != 0xF7) \
+ || NATIVE_TO_LATIN1((U8) c) == 0xAA \
+ || NATIVE_TO_LATIN1((U8) c) == 0xBA \
+ || NATIVE_TO_LATIN1((U8) c) == 0xB5)))
+# define isPRINT_L1(c) (isPRINT_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && NATIVE_TO_LATIN1((U8) c) >= 0xA0))
+# define isPUNCT_L1(c) (isPUNCT_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && (NATIVE_TO_LATIN1((U8) c) == 0xA1 \
+ || NATIVE_TO_LATIN1((U8) c) == 0xA7 \
+ || NATIVE_TO_LATIN1((U8) c) == 0xAB \
+ || NATIVE_TO_LATIN1((U8) c) == 0xB6 \
+ || NATIVE_TO_LATIN1((U8) c) == 0xB7 \
+ || NATIVE_TO_LATIN1((U8) c) == 0xBB \
+ || NATIVE_TO_LATIN1((U8) c) == 0xBF)))
+# define isSPACE_L1(c) (isSPACE_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && (NATIVE_TO_LATIN1((U8) c) == 0x85 \
+ || NATIVE_TO_LATIN1((U8) c) == 0xA0)))
+# define isUPPER_L1(c) (isUPPER_A(c) \
+ || (FITS_IN_8_BITS(c) \
+ && (NATIVE_TO_LATIN1((U8) c) >= 0xC0 \
+ && NATIVE_TO_LATIN1((U8) c) <= 0xDE \
+ && NATIVE_TO_LATIN1((U8) c) != 0xD7)))
+# define isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT_A(c))
+# define isIDFIRST_L1(c) (isALPHA_L1(c) || NATIVE_TO_LATIN1(c) == '_')
+# define isCHARNAME_CONT(c) (isWORDCHAR_L1(c) \
+ || isBLANK_L1(c) \
+ || (c) == '-' \
+ || (c) == '(' \
+ || (c) == ')')
+ /* The following are not fully accurate in the above-ASCII range. I (khw)
+ * don't think it's necessary to be so for the purposes where this gets
+ * compiled */
+# define _isQUOTEMETA(c) (FITS_IN_8_BITS(c) && ! isWORDCHAR_L1(c))
+# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) isALPHA_L1(c)
+
+ /* And these aren't accurate at all. They are useful only for above
+ * Latin1, which utilities and bootstrapping don't deal with */
+# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) 0
+# define _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) 0
+# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) 0
+
+ /* Many of the macros later in this file are defined in terms of these. By
+ * implementing them with a function, which converts the class number into
+ * a call to the desired macro, all of the later ones work. However, that
+ * function won't be actually defined when building a utility program (no
+ * perl.h), and so a compiler error will be generated if one is attempted
+ * to be used. And the above-Latin1 code points require Unicode tables to
+ * be present, something unlikely to be the case when bootstrapping */
+# define _generic_isCC(c, classnum) \
+ (FITS_IN_8_BITS(c) && S_bootstrap_ctype((U8) (c), (classnum), TRUE))
+# define _generic_isCC_A(c, classnum) \
+ (FITS_IN_8_BITS(c) && S_bootstrap_ctype((U8) (c), (classnum), FALSE))
+#endif /* End of no perl.h H_PERL */
+
+#define isALPHANUMERIC(c) isALPHANUMERIC_A(c)
+#define isALPHA(c) isALPHA_A(c)
+#define isASCII_A(c) isASCII(c)
+#define isASCII_L1(c) isASCII(c)
+#define isBLANK(c) isBLANK_A(c)
+#define isCNTRL(c) isCNTRL_A(c)
+#define isDIGIT(c) isDIGIT_A(c)
+#define isGRAPH(c) isGRAPH_A(c)
+#define isIDFIRST(c) isIDFIRST_A(c)
+#define isLOWER(c) isLOWER_A(c)
+#define isPRINT(c) isPRINT_A(c)
+#define isPSXSPC_A(c) isSPACE_A(c)
+#define isPSXSPC(c) isPSXSPC_A(c)
+#define isPSXSPC_L1(c) isSPACE_L1(c)
+#define isPUNCT(c) isPUNCT_A(c)
+#define isSPACE(c) isSPACE_A(c)
+#define isUPPER(c) isUPPER_A(c)
+#define isWORDCHAR(c) isWORDCHAR_A(c)
+#define isXDIGIT(c) isXDIGIT_A(c)
+
+/* ASCII casing. These could also be written as
+ #define toLOWER(c) (isASCII(c) ? toLOWER_LATIN1(c) : (c))
+ #define toUPPER(c) (isASCII(c) ? toUPPER_LATIN1_MOD(c) : (c))
+ which uses table lookup and mask instead of subtraction. (This would
+ work because the _MOD does not apply in the ASCII range) */
+#define toLOWER(c) (isUPPER(c) ? (U8)((c) + ('a' - 'A')) : (c))
+#define toUPPER(c) (isLOWER(c) ? (U8)((c) - ('a' - 'A')) : (c))