X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/851131d3e04cc3e811d1777d97e35d66192b9ff7..9c3680f930b62475fe28e2cb48c344f7c4ef9eb8:/handy.h diff --git a/handy.h b/handy.h index df24bf5..3782788 100644 --- a/handy.h +++ b/handy.h @@ -590,6 +590,12 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc #define isASCII_A(c) isASCII(c) #define isASCII_L1(c) isASCII(c) +/* The lower 3 bits in both the ASCII and EBCDIC representations of '0' are 0, + * and the 8 possible permutations of those bits exactly comprise the 8 octal + * digits */ +#define isOCTAL_A(c) cBOOL(FITS_IN_8_BITS(c) && (0xF8 & (c)) == '0') + + /* ASCII range only */ #ifdef H_PERL /* If have access to perl.h, lookup in its table */ /* Bits for PL_charclass[]. These use names used in l1_char_class_tab.h but @@ -611,7 +617,6 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc # define _CC_IDFIRST_L1 (1<<13) # define _CC_LOWER_A (1<<14) # define _CC_LOWER_L1 (1<<15) -# define _CC_OCTAL_A (1<<16) # define _CC_PRINT_A (1<<17) # define _CC_PRINT_L1 (1<<18) # define _CC_PSXSPC_A (1<<19) @@ -626,12 +631,22 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc # define _CC_WORDCHAR_L1 (1<<28) # define _CC_XDIGIT_A (1<<29) # define _CC_NONLATIN1_FOLD (1<<30) -/* Unused - * (1<<31) +# define _CC_QUOTEMETA (1U<<31) /* 1U keeps Solaris from griping */ +/* Unused: (1<<16) * If more are needed, can give up some of the above. The first ones to go - * would be those that require just two tests to verify, either there are two - * code points, like BLANK_A, or occupy a single range like OCTAL_A, DIGIT_A, - * UPPER_A, and LOWER_A. + * would be those that require just two tests to verify; either there are two + * code points, like BLANK_A, or it occupies a single range like DIGIT_A, + * UPPER_A, and LOWER_A. Also consider the ones that can be replaced with two + * tests and an additional mask, so + * + * #define isCNTRL_A cBOOL(FITS_IN_8_BITS(c) \ + * && (( ! (~0x1F & NATIVE_TO_UNI(c)])) \ + * || UNLIKELY(NATIVE_TO_UNI(c) == 0x7f))) + * + * This takes advantage of the contiguous block of these with the first one's + * representation having the lower order bits all zero;, except the DELETE must + * be tested specially. A similar pattern can be used for for isCNTRL_L1, + * isPRINT_A, and isPRINT_L1 */ # ifdef DOINIT @@ -651,7 +666,6 @@ EXTCONST U32 PL_charclass[]; # define isGRAPH_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_GRAPH_A)) # define isIDFIRST_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_IDFIRST_A)) # define isLOWER_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_LOWER_A)) -# define isOCTAL_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_OCTAL_A)) # define isPRINT_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_PRINT_A)) # define isPSXSPC_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_PSXSPC_A)) # define isPUNCT_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_PUNCT_A)) @@ -662,8 +676,8 @@ EXTCONST U32 PL_charclass[]; /* Either participates in a fold with a character above 255, or is a * multi-char fold */ # define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_NONLATIN1_FOLD)) +# define _isQUOTEMETA(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_QUOTEMETA)) #else /* No perl.h. */ -# define isOCTAL_A(c) ((c) <= '7' && (c) >= '0') # ifdef EBCDIC # define isALNUMC_A(c) (isASCII(c) && isALNUMC(c)) # define isALPHA_A(c) (isASCII(c) && isALPHA(c)) @@ -811,6 +825,8 @@ EXTCONST U32 PL_charclass[]; # define isIDFIRST_LC(c) \ (NXIsAlpha((unsigned int)(c)) || (char)(c) == '_') # define isALPHA_LC(c) NXIsAlpha((unsigned int)(c)) +# define isASCII_LC(c) isASCII((unsigned int)(c)) +# define isBLANK_LC(c) isBLANK((unsigned int)(c)) # define isSPACE_LC(c) NXIsSpace((unsigned int)(c)) # define isDIGIT_LC(c) NXIsDigit((unsigned int)(c)) # define isUPPER_LC(c) NXIsUpper((unsigned int)(c)) @@ -834,6 +850,16 @@ EXTCONST U32 PL_charclass[]; # define isALNUM_LC(c) (isalnum((unsigned char)(c)) || (char)(c) == '_') # define isIDFIRST_LC(c) (isalpha((unsigned char)(c)) || (char)(c) == '_') # define isALPHA_LC(c) isalpha((unsigned char)(c)) +# ifdef HAS_ISASCII +# define isASCII_LC(c) isascii((unsigned char)(c)) +# else +# define isASCII_LC(c) isASCII((unsigned char)(c)) +# endif +# ifdef HAS_ISBLANK +# define isBLANK_LC(c) isblank((unsigned char)(c)) +# else +# define isBLANK_LC(c) isBLANK((unsigned char)(c)) +# endif # define isSPACE_LC(c) isspace((unsigned char)(c)) # define isDIGIT_LC(c) isdigit((unsigned char)(c)) # define isUPPER_LC(c) isupper((unsigned char)(c)) @@ -851,6 +877,12 @@ EXTCONST U32 PL_charclass[]; # define isALNUM_LC(c) (isascii(c) && (isalnum(c) || (c) == '_')) # define isIDFIRST_LC(c) (isascii(c) && (isalpha(c) || (c) == '_')) # define isALPHA_LC(c) (isascii(c) && isalpha(c)) +# define isASCII_LC(c) isascii(c) +# ifdef HAS_ISBLANK +# define isBLANK_LC(c) (isascii(c) && isblank(c)) +# else +# define isBLANK_LC(c) isBLANK(c) +# endif # define isSPACE_LC(c) (isascii(c) && isspace(c)) # define isDIGIT_LC(c) (isascii(c) && isdigit(c)) # define isUPPER_LC(c) (isascii(c) && isupper(c)) @@ -867,7 +899,6 @@ EXTCONST U32 PL_charclass[]; #endif /* USE_NEXT_CTYPE */ #define isPSXSPC_LC(c) (isSPACE_LC(c) || (c) == '\v') -#define isBLANK_LC(c) isBLANK(c) /* could be wrong */ /* For use in the macros just below. If the input is Latin1, use the Latin1 * (_L1) version of the macro; otherwise use the function. Won't compile if @@ -931,7 +962,8 @@ EXTCONST U32 PL_charclass[]; *((p)+1))) \ : function(p)) -/* Note that all ignore 'use bytes' */ +/* Note that all assume that the utf8 has been validated, and ignore 'use + * bytes' */ #define isALNUM_utf8(p) generic_utf8(isWORDCHAR, is_utf8_alnum, p) /* To prevent S_scan_word in toke.c from hanging, we have to make sure that @@ -974,18 +1006,18 @@ EXTCONST U32 PL_charclass[]; : isSPACE_utf8(p))) #define isBLANK_utf8(c) isBLANK(c) /* could be wrong */ -#define isALNUM_LC_utf8(p) isALNUM_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isALPHA_LC_utf8(p) isALPHA_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isSPACE_LC_utf8(p) isSPACE_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isDIGIT_LC_utf8(p) isDIGIT_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isUPPER_LC_utf8(p) isUPPER_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isLOWER_LC_utf8(p) isLOWER_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isALNUMC_LC_utf8(p) isALNUMC_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isCNTRL_LC_utf8(p) isCNTRL_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isGRAPH_LC_utf8(p) isGRAPH_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isPRINT_LC_utf8(p) isPRINT_LC_uvchr(utf8_to_uvchr(p, 0)) -#define isPUNCT_LC_utf8(p) isPUNCT_LC_uvchr(utf8_to_uvchr(p, 0)) +#define isALNUM_LC_utf8(p) isALNUM_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isALPHA_LC_utf8(p) isALPHA_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isSPACE_LC_utf8(p) isSPACE_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isDIGIT_LC_utf8(p) isDIGIT_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isUPPER_LC_utf8(p) isUPPER_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isLOWER_LC_utf8(p) isLOWER_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isALNUMC_LC_utf8(p) isALNUMC_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isCNTRL_LC_utf8(p) isCNTRL_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isGRAPH_LC_utf8(p) isGRAPH_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isPRINT_LC_utf8(p) isPRINT_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isPUNCT_LC_utf8(p) isPUNCT_LC_uvchr(valid_utf8_to_uvchr(p, 0)) #define isPSXSPC_LC_utf8(c) (isSPACE_LC_utf8(c) ||(c) == '\f') #define isBLANK_LC_utf8(c) isBLANK(c) /* could be wrong */ @@ -1284,8 +1316,8 @@ void Perl_mem_log_del_sv(const SV *sv, const char *filename, const int linenumbe * Local variables: * c-indentation-style: bsd * c-basic-offset: 4 - * indent-tabs-mode: t + * indent-tabs-mode: nil * End: * - * ex: set ts=8 sts=4 sw=4 noet: + * ex: set ts=8 sts=4 sw=4 et: */