This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
handy.h: Add synonym for some macros
[perl5.git] / handy.h
diff --git a/handy.h b/handy.h
index 5134c9d..f6701f1 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -753,7 +753,7 @@ EXTCONST U32 PL_charclass[];
            || NATIVE_TO_UNI((U8) c) == 0xAA \
            || NATIVE_TO_UNI((U8) c) == 0xB5 \
            || NATIVE_TO_UNI((U8) c) == 0xBA)))
-#   define isCHARNAME_CONT(c) (isALNUM_L1(c) || (c) == ' ' || (c) == '-' || (c) == '(' || (c) == ')' || (c) == ':' || NATIVE_TO_UNI((U8) c) == 0xA0)
+#   define isCHARNAME_CONT(c) (isWORDCHAR_L1(c) || (c) == ' ' || (c) == '-' || (c) == '(' || (c) == ')' || (c) == ':' || NATIVE_TO_UNI((U8) c) == 0xA0)
 #endif
 
 /* Macros for backwards compatibility and for completeness when the ASCII and
@@ -784,7 +784,8 @@ EXTCONST U32 PL_charclass[];
 #   define isPRINT(c)  isprint(c)
 #   define isPSXSPC(c) isspace(c)
 #   define isPUNCT(c)  ispunct(c)
-#   define isSPACE(c)   (isPSXSPC(c) && (c) != '\v')
+#   define isSPACE(c)   (isPSXSPC(c) /* && (c) != '\v' (Experimentally making
+                                        these macros identical) */)
 #   define isUPPER(c)  isupper(c)
 #   define isXDIGIT(c) isxdigit(c)
 #   define isWORDCHAR(c) (isalnum(c) || (c) == '_')
@@ -833,8 +834,9 @@ EXTCONST U32 PL_charclass[];
 
 #ifdef USE_NEXT_CTYPE
 
-#  define isALNUM_LC(c) \
+#  define isWORDCHAR_LC(c) \
        (NXIsAlNum((unsigned int)(c)) || (char)(c) == '_')
+#  define isALNUM_LC(c) isWORDCHAR_LC(c)
 #  define isIDFIRST_LC(c) \
        (NXIsAlpha((unsigned int)(c)) || (char)(c) == '_')
 #  define isALPHA_LC(c)                NXIsAlpha((unsigned int)(c))
@@ -856,38 +858,40 @@ EXTCONST U32 PL_charclass[];
 
 #  if defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII))
 
-/* Note that the foo_LC() macros in this case generally are defined only on
- * code points 0-256, and give undefined, unwarned results if called with
- * values outside that range */
+/* Use foo_LC_uvchr() instead  of these for beyond the Latin1 range */
 
-#    define isALNUM_LC(c)   (isalnum((unsigned char)(c)) || (char)(c) == '_')
-#    define isIDFIRST_LC(c) (isalpha((unsigned char)(c)) || (char)(c) == '_')
-#    define isALPHA_LC(c)      isalpha((unsigned char)(c))
+#    define isWORDCHAR_LC(c) (FITS_IN_8_BITS(c)                                \
+                           && (isalnum((unsigned char)(c)) || (char)(c) == '_'))
+#    define isALNUM_LC(c) isWORDCHAR_LC(c)
+#    define isIDFIRST_LC(c) (FITS_IN_8_BITS(c)                                 \
+                           && (isalpha((unsigned char)(c)) || (char)(c) == '_'))
+#    define isALPHA_LC(c)   (FITS_IN_8_BITS(c) && isalpha((unsigned char)(c)))
 #    ifdef HAS_ISASCII
-#      define isASCII_LC(c)    isascii((unsigned char)(c))
+#      define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii((unsigned char)(c)))
 #    else
-#      define isASCII_LC(c)    isASCII((unsigned char)(c))
+#      define isASCII_LC(c) (FITS_IN_8_BITS(c) && isASCII((unsigned char)(c)))
 #    endif
 #    ifdef HAS_ISBLANK
-#      define isBLANK_LC(c)    isblank((unsigned char)(c))
+#      define isBLANK_LC(c) (FITS_IN_8_BITS(c) && isblank((unsigned char)(c)))
 #    else
-#      define isBLANK_LC(c)    isBLANK((unsigned char)(c))
+#      define isBLANK_LC(c) (FITS_IN_8_BITS(c) && isBLANK((unsigned char)(c)))
 #    endif
-#    define isSPACE_LC(c)      isspace((unsigned char)(c))
-#    define isDIGIT_LC(c)      isdigit((unsigned char)(c))
-#    define isUPPER_LC(c)      isupper((unsigned char)(c))
-#    define isLOWER_LC(c)      islower((unsigned char)(c))
-#    define isALNUMC_LC(c)     isalnum((unsigned char)(c))
-#    define isCNTRL_LC(c)      iscntrl((unsigned char)(c))
-#    define isGRAPH_LC(c)      isgraph((unsigned char)(c))
-#    define isPRINT_LC(c)      isprint((unsigned char)(c))
-#    define isPUNCT_LC(c)      ispunct((unsigned char)(c))
-#    define toUPPER_LC(c)      toupper((unsigned char)(c))
-#    define toLOWER_LC(c)      tolower((unsigned char)(c))
+#    define isSPACE_LC(c)    (FITS_IN_8_BITS(c) && isspace((unsigned char)(c)))
+#    define isDIGIT_LC(c)    (FITS_IN_8_BITS(c) && isdigit((unsigned char)(c)))
+#    define isUPPER_LC(c)    (FITS_IN_8_BITS(c) && isupper((unsigned char)(c)))
+#    define isLOWER_LC(c)    (FITS_IN_8_BITS(c) && islower((unsigned char)(c)))
+#    define isALNUMC_LC(c)   (FITS_IN_8_BITS(c) && isalnum((unsigned char)(c)))
+#    define isCNTRL_LC(c)    (FITS_IN_8_BITS(c) && iscntrl((unsigned char)(c)))
+#    define isGRAPH_LC(c)    (FITS_IN_8_BITS(c) && isgraph((unsigned char)(c)))
+#    define isPRINT_LC(c)    (FITS_IN_8_BITS(c) && isprint((unsigned char)(c)))
+#    define isPUNCT_LC(c)    (FITS_IN_8_BITS(c) && ispunct((unsigned char)(c)))
+#    define toUPPER_LC(c) (FITS_IN_8_BITS(c) ? toupper((unsigned char)(c)) : (c))
+#    define toLOWER_LC(c) (FITS_IN_8_BITS(c) ? tolower((unsigned char)(c)) : (c))
 
 #  else
 
-#    define isALNUM_LC(c)      (isascii(c) && (isalnum(c) || (c) == '_'))
+#    define isWORDCHAR_LC(c)   (isascii(c) && (isalnum(c) || (c) == '_'))
+#    define isALNUM_LC(c)      isWORDCHAR_LC(c)
 #    define isIDFIRST_LC(c)    (isascii(c) && (isalpha(c) || (c) == '_'))
 #    define isALPHA_LC(c)      (isascii(c) && isalpha(c))
 #    define isASCII_LC(c)      isascii(c)
@@ -905,8 +909,8 @@ EXTCONST U32 PL_charclass[];
 #    define isGRAPH_LC(c)      (isascii(c) && isgraph(c))
 #    define isPRINT_LC(c)      (isascii(c) && isprint(c))
 #    define isPUNCT_LC(c)      (isascii(c) && ispunct(c))
-#    define toUPPER_LC(c)      toupper(c)
-#    define toLOWER_LC(c)      tolower(c)
+#    define toUPPER_LC(c)      (isascii(c) ? toupper(c) : (c))
+#    define toLOWER_LC(c)      (isascii(c) ? tolower(c) : (c))
 
 #  endif
 #endif /* USE_NEXT_CTYPE */
@@ -953,19 +957,24 @@ EXTCONST U32 PL_charclass[];
 #define toLOWER_uni(c,s,l)     to_uni_lower(c,s,l)
 #define toFOLD_uni(c,s,l)      to_uni_fold(c,s,l)
 
-#define isALNUM_LC_uvchr(c)    (c < 256 ? isALNUM_LC(c) : is_uni_alnum_lc(c))
-#define isIDFIRST_LC_uvchr(c)  (c < 256 ? isIDFIRST_LC(c) : is_uni_idfirst_lc(c))
-#define isALPHA_LC_uvchr(c)    (c < 256 ? isALPHA_LC(c) : is_uni_alpha_lc(c))
-#define isSPACE_LC_uvchr(c)    (c < 256 ? isSPACE_LC(c) : is_uni_space_lc(c))
-#define isDIGIT_LC_uvchr(c)    (c < 256 ? isDIGIT_LC(c) : is_uni_digit_lc(c))
-#define isUPPER_LC_uvchr(c)    (c < 256 ? isUPPER_LC(c) : is_uni_upper_lc(c))
-#define isLOWER_LC_uvchr(c)    (c < 256 ? isLOWER_LC(c) : is_uni_lower_lc(c))
-#define isCNTRL_LC_uvchr(c)    (c < 256 ? isCNTRL_LC(c) : is_uni_cntrl_lc(c))
-#define isGRAPH_LC_uvchr(c)    (c < 256 ? isGRAPH_LC(c) : is_uni_graph_lc(c))
-#define isPRINT_LC_uvchr(c)    (c < 256 ? isPRINT_LC(c) : is_uni_print_lc(c))
-#define isPUNCT_LC_uvchr(c)    (c < 256 ? isPUNCT_LC(c) : is_uni_punct_lc(c))
-
-#define isPSXSPC_LC_uni(c)     (isSPACE_LC_uni(c) ||(c) == '\f')
+#define _gnrc_is_LC_uvchr(latin1, above_latin1, c)                            \
+                        (c < 256 ? latin1(c) : above_latin1(NATIVE_TO_UNI(c)))
+#define isWORDCHAR_LC_uvchr(c)  _gnrc_is_LC_uvchr(isWORDCHAR_LC,              \
+                                                        is_uni_alnum_lc, c)
+#define isALNUM_LC_uvchr(c)  isWORDCHAR_LC_uvchr(c)
+#define isIDFIRST_LC_uvchr(c)  _gnrc_is_LC_uvchr(isIDFIRST_LC,                 \
+                                                        is_uni_idfirst_lc, c)
+#define isALPHA_LC_uvchr(c)  _gnrc_is_LC_uvchr(isALPHA_LC, is_uni_alpha_lc, c)
+#define isSPACE_LC_uvchr(c)  _gnrc_is_LC_uvchr(isSPACE_LC,                     \
+                                                       is_XPERLSPACE_cp_high, c)
+#define isDIGIT_LC_uvchr(c)  _gnrc_is_LC_uvchr(isDIGIT_LC, is_uni_digit_lc, c)
+#define isUPPER_LC_uvchr(c)  _gnrc_is_LC_uvchr(isUPPER_LC, is_uni_upper_lc, c)
+#define isLOWER_LC_uvchr(c)  _gnrc_is_LC_uvchr(isLOWER_LC, is_uni_lower_lc, c)
+#define isCNTRL_LC_uvchr(c)  (c < 256 ? isCNTRL_LC(c) : 0)
+#define isGRAPH_LC_uvchr(c)  _gnrc_is_LC_uvchr(isGRAPH_LC, is_uni_graph_lc, c)
+#define isPRINT_LC_uvchr(c)  _gnrc_is_LC_uvchr(isPRINT_LC, is_uni_print_lc, c)
+#define isPUNCT_LC_uvchr(c)  _gnrc_is_LC_uvchr(isPUNCT_LC, is_uni_punct_lc, c)
+
 #define isBLANK_LC_uni(c)      isBLANK(c) /* could be wrong */
 
 /* For internal core Perl use only.  If the input is in the Latin1 range, use
@@ -1023,19 +1032,36 @@ EXTCONST U32 PL_charclass[];
  * Latin1 */
 #define isPSXSPC_utf8(p)        _generic_utf8(_CC_PSXSPC, is_XPERLSPACE_high, p)
 
-#define isALNUM_LC_utf8(p)     isALNUM_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isIDFIRST_LC_utf8(p)   isIDFIRST_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isALPHA_LC_utf8(p)     isALPHA_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isBLANK_LC_utf8(p)     isBLANK_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isSPACE_LC_utf8(p)     isSPACE_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isDIGIT_LC_utf8(p)     isDIGIT_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isUPPER_LC_utf8(p)     isUPPER_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isLOWER_LC_utf8(p)     isLOWER_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isALNUMC_LC_utf8(p)    isALNUMC_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isCNTRL_LC_utf8(p)     isCNTRL_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isGRAPH_LC_utf8(p)     isGRAPH_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isPRINT_LC_utf8(p)     isPRINT_LC_uvchr(valid_utf8_to_uvchr(p,  0))
-#define isPUNCT_LC_utf8(p)     isPUNCT_LC_uvchr(valid_utf8_to_uvchr(p,  0))
+/* For internal core Perl use only.  If the input is in the Latin1 range, use
+ * the macro 'macro' on 'p' which is a pointer to a UTF-8 string.  Otherwise
+ * use the value given by the 'utf8' parameter.  This relies on the fact that
+ * ASCII characters have the same representation whether utf8 or not.  Note
+ * that it assumes that the utf8 has been validated, and ignores 'use bytes' */
+#define _generic_LC_utf8_utf8(macro, p, utf8) \
+                         (UTF8_IS_INVARIANT(*(p))                          \
+                         ? macro(*(p))                                     \
+                         : (UTF8_IS_DOWNGRADEABLE_START(*(p)))             \
+                           ? macro(TWO_BYTE_UTF8_TO_UNI(*(p), *((p)+1)))   \
+                           : utf8)
+
+#define _generic_LC_utf8(macro, utf8_func, p)                              \
+                            _generic_LC_utf8_utf8(macro, p, utf8_func(p))
+
+#define isWORDCHAR_LC_utf8(p) _generic_LC_utf8(isWORDCHAR_LC, is_utf8_alnum, p)
+#define isALNUM_LC_utf8(p)   isWORDCHAR_LC_utf8(p)
+#define isIDFIRST_LC_utf8(p) _generic_LC_utf8(isIDFIRST_LC,                \
+                                                    _is_utf8_perl_idstart, p)
+#define isALPHA_LC_utf8(p)   _generic_LC_utf8(isALPHA_LC, is_utf8_alpha, p)
+#define isBLANK_LC_utf8(p)   _generic_LC_utf8(isBLANK_LC, is_HORIZWS_high, p)
+#define isSPACE_LC_utf8(p)   _generic_LC_utf8(isSPACE_LC, is_XPERLSPACE_high, p)
+#define isDIGIT_LC_utf8(p)   _generic_LC_utf8(isDIGIT_LC, is_utf8_digit, p)
+#define isUPPER_LC_utf8(p)   _generic_LC_utf8(isUPPER_LC, is_utf8_upper, p)
+#define isLOWER_LC_utf8(p)   _generic_LC_utf8(isLOWER_LC, is_utf8_lower, p)
+#define isALNUMC_LC_utf8(p)  _generic_LC_utf8(isALNUMC_LC, is_utf8_alnumc, p)
+#define isCNTRL_LC_utf8(p)   _generic_LC_utf8_utf8(isCNTRL_LC, p, 0)
+#define isGRAPH_LC_utf8(p)   _generic_LC_utf8(isGRAPH_LC, is_utf8_graph, p)
+#define isPRINT_LC_utf8(p)   _generic_LC_utf8(isPRINT_LC, is_utf8_print, p)
+#define isPUNCT_LC_utf8(p)   _generic_LC_utf8(isPUNCT_LC, is_utf8_punct, p)
 
 #define isPSXSPC_LC_utf8(c)    (isSPACE_LC_utf8(c) ||(c) == '\f')