|| NATIVE_TO_UNI((U8) c) == 0xAA \
|| NATIVE_TO_UNI((U8) c) == 0xB5 \
|| NATIVE_TO_UNI((U8) c) == 0xBA)))
-# define isCHARNAME_CONT(c) (isALNUM_L1(c) || (c) == ' ' || (c) == '-' || (c) == '(' || (c) == ')' || (c) == ':' || NATIVE_TO_UNI((U8) c) == 0xA0)
+# define isCHARNAME_CONT(c) (isWORDCHAR_L1(c) || (c) == ' ' || (c) == '-' || (c) == '(' || (c) == ')' || (c) == ':' || NATIVE_TO_UNI((U8) c) == 0xA0)
#endif
/* Macros for backwards compatibility and for completeness when the ASCII and
# define isPRINT(c) isprint(c)
# define isPSXSPC(c) isspace(c)
# define isPUNCT(c) ispunct(c)
-# define isSPACE(c) (isPSXSPC(c) && (c) != '\v')
+# define isSPACE(c) (isPSXSPC(c) /* && (c) != '\v' (Experimentally making
+ these macros identical) */)
# define isUPPER(c) isupper(c)
# define isXDIGIT(c) isxdigit(c)
# define isWORDCHAR(c) (isalnum(c) || (c) == '_')
#ifdef USE_NEXT_CTYPE
-# define isALNUM_LC(c) \
+# define isWORDCHAR_LC(c) \
(NXIsAlNum((unsigned int)(c)) || (char)(c) == '_')
+# define isALNUM_LC(c) isWORDCHAR_LC(c)
# define isIDFIRST_LC(c) \
(NXIsAlpha((unsigned int)(c)) || (char)(c) == '_')
# define isALPHA_LC(c) NXIsAlpha((unsigned int)(c))
# if defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII))
-/* Note that the foo_LC() macros in this case generally are defined only on
- * code points 0-256, and give undefined, unwarned results if called with
- * values outside that range */
+/* Use foo_LC_uvchr() instead of these for beyond the Latin1 range */
-# define isALNUM_LC(c) (isalnum((unsigned char)(c)) || (char)(c) == '_')
-# define isIDFIRST_LC(c) (isalpha((unsigned char)(c)) || (char)(c) == '_')
-# define isALPHA_LC(c) isalpha((unsigned char)(c))
+# define isWORDCHAR_LC(c) (FITS_IN_8_BITS(c) \
+ && (isalnum((unsigned char)(c)) || (char)(c) == '_'))
+# define isALNUM_LC(c) isWORDCHAR_LC(c)
+# define isIDFIRST_LC(c) (FITS_IN_8_BITS(c) \
+ && (isalpha((unsigned char)(c)) || (char)(c) == '_'))
+# define isALPHA_LC(c) (FITS_IN_8_BITS(c) && isalpha((unsigned char)(c)))
# ifdef HAS_ISASCII
-# define isASCII_LC(c) isascii((unsigned char)(c))
+# define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii((unsigned char)(c)))
# else
-# define isASCII_LC(c) isASCII((unsigned char)(c))
+# define isASCII_LC(c) (FITS_IN_8_BITS(c) && isASCII((unsigned char)(c)))
# endif
# ifdef HAS_ISBLANK
-# define isBLANK_LC(c) isblank((unsigned char)(c))
+# define isBLANK_LC(c) (FITS_IN_8_BITS(c) && isblank((unsigned char)(c)))
# else
-# define isBLANK_LC(c) isBLANK((unsigned char)(c))
+# define isBLANK_LC(c) (FITS_IN_8_BITS(c) && isBLANK((unsigned char)(c)))
# endif
-# define isSPACE_LC(c) isspace((unsigned char)(c))
-# define isDIGIT_LC(c) isdigit((unsigned char)(c))
-# define isUPPER_LC(c) isupper((unsigned char)(c))
-# define isLOWER_LC(c) islower((unsigned char)(c))
-# define isALNUMC_LC(c) isalnum((unsigned char)(c))
-# define isCNTRL_LC(c) iscntrl((unsigned char)(c))
-# define isGRAPH_LC(c) isgraph((unsigned char)(c))
-# define isPRINT_LC(c) isprint((unsigned char)(c))
-# define isPUNCT_LC(c) ispunct((unsigned char)(c))
-# define toUPPER_LC(c) toupper((unsigned char)(c))
-# define toLOWER_LC(c) tolower((unsigned char)(c))
+# define isSPACE_LC(c) (FITS_IN_8_BITS(c) && isspace((unsigned char)(c)))
+# define isDIGIT_LC(c) (FITS_IN_8_BITS(c) && isdigit((unsigned char)(c)))
+# define isUPPER_LC(c) (FITS_IN_8_BITS(c) && isupper((unsigned char)(c)))
+# define isLOWER_LC(c) (FITS_IN_8_BITS(c) && islower((unsigned char)(c)))
+# define isALNUMC_LC(c) (FITS_IN_8_BITS(c) && isalnum((unsigned char)(c)))
+# define isCNTRL_LC(c) (FITS_IN_8_BITS(c) && iscntrl((unsigned char)(c)))
+# define isGRAPH_LC(c) (FITS_IN_8_BITS(c) && isgraph((unsigned char)(c)))
+# define isPRINT_LC(c) (FITS_IN_8_BITS(c) && isprint((unsigned char)(c)))
+# define isPUNCT_LC(c) (FITS_IN_8_BITS(c) && ispunct((unsigned char)(c)))
+# define toUPPER_LC(c) (FITS_IN_8_BITS(c) ? toupper((unsigned char)(c)) : (c))
+# define toLOWER_LC(c) (FITS_IN_8_BITS(c) ? tolower((unsigned char)(c)) : (c))
# else
-# define isALNUM_LC(c) (isascii(c) && (isalnum(c) || (c) == '_'))
+# define isWORDCHAR_LC(c) (isascii(c) && (isalnum(c) || (c) == '_'))
+# define isALNUM_LC(c) isWORDCHAR_LC(c)
# define isIDFIRST_LC(c) (isascii(c) && (isalpha(c) || (c) == '_'))
# define isALPHA_LC(c) (isascii(c) && isalpha(c))
# define isASCII_LC(c) isascii(c)
# define isGRAPH_LC(c) (isascii(c) && isgraph(c))
# define isPRINT_LC(c) (isascii(c) && isprint(c))
# define isPUNCT_LC(c) (isascii(c) && ispunct(c))
-# define toUPPER_LC(c) toupper(c)
-# define toLOWER_LC(c) tolower(c)
+# define toUPPER_LC(c) (isascii(c) ? toupper(c) : (c))
+# define toLOWER_LC(c) (isascii(c) ? tolower(c) : (c))
# endif
#endif /* USE_NEXT_CTYPE */
#define toLOWER_uni(c,s,l) to_uni_lower(c,s,l)
#define toFOLD_uni(c,s,l) to_uni_fold(c,s,l)
-#define isALNUM_LC_uvchr(c) (c < 256 ? isALNUM_LC(c) : is_uni_alnum_lc(c))
-#define isIDFIRST_LC_uvchr(c) (c < 256 ? isIDFIRST_LC(c) : is_uni_idfirst_lc(c))
-#define isALPHA_LC_uvchr(c) (c < 256 ? isALPHA_LC(c) : is_uni_alpha_lc(c))
-#define isSPACE_LC_uvchr(c) (c < 256 ? isSPACE_LC(c) : is_uni_space_lc(c))
-#define isDIGIT_LC_uvchr(c) (c < 256 ? isDIGIT_LC(c) : is_uni_digit_lc(c))
-#define isUPPER_LC_uvchr(c) (c < 256 ? isUPPER_LC(c) : is_uni_upper_lc(c))
-#define isLOWER_LC_uvchr(c) (c < 256 ? isLOWER_LC(c) : is_uni_lower_lc(c))
-#define isCNTRL_LC_uvchr(c) (c < 256 ? isCNTRL_LC(c) : is_uni_cntrl_lc(c))
-#define isGRAPH_LC_uvchr(c) (c < 256 ? isGRAPH_LC(c) : is_uni_graph_lc(c))
-#define isPRINT_LC_uvchr(c) (c < 256 ? isPRINT_LC(c) : is_uni_print_lc(c))
-#define isPUNCT_LC_uvchr(c) (c < 256 ? isPUNCT_LC(c) : is_uni_punct_lc(c))
-
-#define isPSXSPC_LC_uni(c) (isSPACE_LC_uni(c) ||(c) == '\f')
+#define _gnrc_is_LC_uvchr(latin1, above_latin1, c) \
+ (c < 256 ? latin1(c) : above_latin1(NATIVE_TO_UNI(c)))
+#define isWORDCHAR_LC_uvchr(c) _gnrc_is_LC_uvchr(isWORDCHAR_LC, \
+ is_uni_alnum_lc, c)
+#define isALNUM_LC_uvchr(c) isWORDCHAR_LC_uvchr(c)
+#define isIDFIRST_LC_uvchr(c) _gnrc_is_LC_uvchr(isIDFIRST_LC, \
+ is_uni_idfirst_lc, c)
+#define isALPHA_LC_uvchr(c) _gnrc_is_LC_uvchr(isALPHA_LC, is_uni_alpha_lc, c)
+#define isSPACE_LC_uvchr(c) _gnrc_is_LC_uvchr(isSPACE_LC, \
+ is_XPERLSPACE_cp_high, c)
+#define isDIGIT_LC_uvchr(c) _gnrc_is_LC_uvchr(isDIGIT_LC, is_uni_digit_lc, c)
+#define isUPPER_LC_uvchr(c) _gnrc_is_LC_uvchr(isUPPER_LC, is_uni_upper_lc, c)
+#define isLOWER_LC_uvchr(c) _gnrc_is_LC_uvchr(isLOWER_LC, is_uni_lower_lc, c)
+#define isCNTRL_LC_uvchr(c) (c < 256 ? isCNTRL_LC(c) : 0)
+#define isGRAPH_LC_uvchr(c) _gnrc_is_LC_uvchr(isGRAPH_LC, is_uni_graph_lc, c)
+#define isPRINT_LC_uvchr(c) _gnrc_is_LC_uvchr(isPRINT_LC, is_uni_print_lc, c)
+#define isPUNCT_LC_uvchr(c) _gnrc_is_LC_uvchr(isPUNCT_LC, is_uni_punct_lc, c)
+
#define isBLANK_LC_uni(c) isBLANK(c) /* could be wrong */
/* For internal core Perl use only. If the input is in the Latin1 range, use
* Latin1 */
#define isPSXSPC_utf8(p) _generic_utf8(_CC_PSXSPC, is_XPERLSPACE_high, p)
-#define isALNUM_LC_utf8(p) isALNUM_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isALPHA_LC_utf8(p) isALPHA_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isBLANK_LC_utf8(p) isBLANK_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isSPACE_LC_utf8(p) isSPACE_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isDIGIT_LC_utf8(p) isDIGIT_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isUPPER_LC_utf8(p) isUPPER_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isLOWER_LC_utf8(p) isLOWER_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isALNUMC_LC_utf8(p) isALNUMC_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isCNTRL_LC_utf8(p) isCNTRL_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isGRAPH_LC_utf8(p) isGRAPH_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isPRINT_LC_utf8(p) isPRINT_LC_uvchr(valid_utf8_to_uvchr(p, 0))
-#define isPUNCT_LC_utf8(p) isPUNCT_LC_uvchr(valid_utf8_to_uvchr(p, 0))
+/* For internal core Perl use only. If the input is in the Latin1 range, use
+ * the macro 'macro' on 'p' which is a pointer to a UTF-8 string. Otherwise
+ * use the value given by the 'utf8' parameter. This relies on the fact that
+ * ASCII characters have the same representation whether utf8 or not. Note
+ * that it assumes that the utf8 has been validated, and ignores 'use bytes' */
+#define _generic_LC_utf8_utf8(macro, p, utf8) \
+ (UTF8_IS_INVARIANT(*(p)) \
+ ? macro(*(p)) \
+ : (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
+ ? macro(TWO_BYTE_UTF8_TO_UNI(*(p), *((p)+1))) \
+ : utf8)
+
+#define _generic_LC_utf8(macro, utf8_func, p) \
+ _generic_LC_utf8_utf8(macro, p, utf8_func(p))
+
+#define isWORDCHAR_LC_utf8(p) _generic_LC_utf8(isWORDCHAR_LC, is_utf8_alnum, p)
+#define isALNUM_LC_utf8(p) isWORDCHAR_LC_utf8(p)
+#define isIDFIRST_LC_utf8(p) _generic_LC_utf8(isIDFIRST_LC, \
+ _is_utf8_perl_idstart, p)
+#define isALPHA_LC_utf8(p) _generic_LC_utf8(isALPHA_LC, is_utf8_alpha, p)
+#define isBLANK_LC_utf8(p) _generic_LC_utf8(isBLANK_LC, is_HORIZWS_high, p)
+#define isSPACE_LC_utf8(p) _generic_LC_utf8(isSPACE_LC, is_XPERLSPACE_high, p)
+#define isDIGIT_LC_utf8(p) _generic_LC_utf8(isDIGIT_LC, is_utf8_digit, p)
+#define isUPPER_LC_utf8(p) _generic_LC_utf8(isUPPER_LC, is_utf8_upper, p)
+#define isLOWER_LC_utf8(p) _generic_LC_utf8(isLOWER_LC, is_utf8_lower, p)
+#define isALNUMC_LC_utf8(p) _generic_LC_utf8(isALNUMC_LC, is_utf8_alnumc, p)
+#define isCNTRL_LC_utf8(p) _generic_LC_utf8_utf8(isCNTRL_LC, p, 0)
+#define isGRAPH_LC_utf8(p) _generic_LC_utf8(isGRAPH_LC, is_utf8_graph, p)
+#define isPRINT_LC_utf8(p) _generic_LC_utf8(isPRINT_LC, is_utf8_print, p)
+#define isPUNCT_LC_utf8(p) _generic_LC_utf8(isPUNCT_LC, is_utf8_punct, p)
#define isPSXSPC_LC_utf8(c) (isSPACE_LC_utf8(c) ||(c) == '\f')