X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/b257a28c3a214073e8f73ee768a25f96c841e422..05ba7c096a1637812610fe686e02f626fa5a39f0:/handy.h diff --git a/handy.h b/handy.h index 57ad62d..51f79ef 100644 --- a/handy.h +++ b/handy.h @@ -260,7 +260,7 @@ typedef U64TYPE U64; * If not, subtracting 1 would leave the uppermost bit set, so the & would * yield non-zero */ #if defined(PERL_CORE) || defined(PERL_EXT) -# define isPOWER_OF_2(n) (n && (n & (n-1)) == 0) +# define isPOWER_OF_2(n) ((n) && ((n) & ((n)-1)) == 0) #endif /* This is a helper macro to avoid preprocessor issues, replaced by nothing @@ -1094,6 +1094,28 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc #define FITS_IN_8_BITS(c) (1) #endif +/* Returns true if c is in the range l..u, where 'l' is non-negative + * Written this way so that after optimization, only one conditional test is + * needed. + * + * This isn't fully general, except for the special cased 'signed char' (which + * should be resolved at compile time): It won't work if 'c' is negative, and + * 'l' is larger than the max for that signed type. Thus if 'c' is a negative + * int, and 'l' is larger than INT_MAX, it will fail. To protect agains this + * happening, there is an assert that will generate a warning if c is larger + * than e.g. INT_MAX if it is an 'unsigned int'. This could be a false + * positive, but khw couldn't figure out a way to make it better. It's good + * enough so far */ +#define inRANGE(c, l, u) (__ASSERT_((l) >= 0) __ASSERT_((u) >= (l)) \ + ((sizeof(c) == 1) \ + ? (((WIDEST_UTYPE) ((((U8) (c))|0) - (l))) <= ((WIDEST_UTYPE) ((u) - (l)))) \ + : (__ASSERT_( (((WIDEST_UTYPE) 1) << (CHARBITS * sizeof(c) - 1) & (c)) \ + /* sign bit of c is 0 */ == 0 \ + || (((~ ((WIDEST_UTYPE) 1) << ((CHARBITS * sizeof(c) - 1) - 1))\ + /* l not larger than largest value in c's signed type */ \ + & ~ ((WIDEST_UTYPE) 0)) & (l)) == 0) \ + ((WIDEST_UTYPE) (((c) - (l)) | 0) <= ((WIDEST_UTYPE) ((u) - (l))))))) + #ifdef EBCDIC # ifndef _ALL_SOURCE /* The native libc isascii() et.al. functions return the wrong results @@ -1248,17 +1270,28 @@ END_EXTERN_C && ((PL_charclass[(U8) (c)] & _CC_mask_A(classnum)) \ == _CC_mask_A(classnum))) -# define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA) +/* On ASCII platforms certain classes form a single range. It's faster to + * special case these. isDIGIT is a single range on all platforms */ +# ifdef EBCDIC +# define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA) +# define isGRAPH_A(c) _generic_isCC_A(c, _CC_GRAPH) +# define isLOWER_A(c) _generic_isCC_A(c, _CC_LOWER) +# define isPRINT_A(c) _generic_isCC_A(c, _CC_PRINT) +# define isUPPER_A(c) _generic_isCC_A(c, _CC_UPPER) +# else + /* By folding the upper and lowercase, we can use a single range */ +# define isALPHA_A(c) inRANGE((~('A' ^ 'a') & (c)), 'A', 'Z') +# define isGRAPH_A(c) inRANGE(c, ' ' + 1, 0x7e) +# define isLOWER_A(c) inRANGE(c, 'a', 'z') +# define isPRINT_A(c) inRANGE(c, ' ', 0x7e) +# define isUPPER_A(c) inRANGE(c, 'A', 'Z') +# endif # define isALPHANUMERIC_A(c) _generic_isCC_A(c, _CC_ALPHANUMERIC) # define isBLANK_A(c) _generic_isCC_A(c, _CC_BLANK) # define isCNTRL_A(c) _generic_isCC_A(c, _CC_CNTRL) -# define isDIGIT_A(c) _generic_isCC(c, _CC_DIGIT) /* No non-ASCII digits */ -# define isGRAPH_A(c) _generic_isCC_A(c, _CC_GRAPH) -# define isLOWER_A(c) _generic_isCC_A(c, _CC_LOWER) -# define isPRINT_A(c) _generic_isCC_A(c, _CC_PRINT) +# define isDIGIT_A(c) inRANGE(c, '0', '9') # define isPUNCT_A(c) _generic_isCC_A(c, _CC_PUNCT) # define isSPACE_A(c) _generic_isCC_A(c, _CC_SPACE) -# define isUPPER_A(c) _generic_isCC_A(c, _CC_UPPER) # define isWORDCHAR_A(c) _generic_isCC_A(c, _CC_WORDCHAR) # define isXDIGIT_A(c) _generic_isCC(c, _CC_XDIGIT) /* No non-ASCII xdigits */ @@ -1304,7 +1337,7 @@ END_EXTERN_C * hard-code various macro definitions that wouldn't otherwise be available * to it. Most are coded based on first principles. These are written to * avoid EBCDIC vs. ASCII #ifdef's as much as possible. */ -# define isDIGIT_A(c) ((c) <= '9' && (c) >= '0') +# define isDIGIT_A(c) inRANGE(c, '0', '9') # define isBLANK_A(c) ((c) == ' ' || (c) == '\t') # define isSPACE_A(c) (isBLANK_A(c) \ || (c) == '\n' \ @@ -1315,21 +1348,19 @@ END_EXTERN_C * uppercase. The tests for those aren't necessary on ASCII, but hurt only * performance (if optimization isn't on), and allow the same code to be * used for both platform types */ -# define isLOWER_A(c) ((c) >= 'a' && (c) <= 'z' \ - && ( (c) <= 'i' \ - || ((c) >= 'j' && (c) <= 'r') \ - || (c) >= 's')) -# define isUPPER_A(c) ((c) >= 'A' && (c) <= 'Z' \ - && ( (c) <= 'I' \ - || ((c) >= 'J' && (c) <= 'R') \ - || (c) >= 'S')) +# define isLOWER_A(c) inRANGE((c), 'a', 'i') \ + || inRANGE((c), 'j', 'r') \ + || inRANGE((c), 's', 'z') +# define isUPPER_A(c) inRANGE((c), 'A', 'I') \ + || inRANGE((c), 'J', 'R') \ + || inRANGE((c), 'S', 'Z') # define isALPHA_A(c) (isUPPER_A(c) || isLOWER_A(c)) # define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c)) # define isWORDCHAR_A(c) (isALPHANUMERIC_A(c) || (c) == '_') # define isIDFIRST_A(c) (isALPHA_A(c) || (c) == '_') -# define isXDIGIT_A(c) (isDIGIT_A(c) \ - || ((c) >= 'a' && (c) <= 'f') \ - || ((c) <= 'F' && (c) >= 'A')) +# define isXDIGIT_A(c) ( isDIGIT_A(c) \ + || inRANGE((c), 'a', 'f') \ + || inRANGE((c), 'A', 'F') # define isPUNCT_A(c) ((c) == '-' || (c) == '!' || (c) == '"' \ || (c) == '#' || (c) == '$' || (c) == '%' \ || (c) == '&' || (c) == '\'' || (c) == '(' \ @@ -1351,13 +1382,13 @@ END_EXTERN_C # define isCNTRL_A(c) ((c) == '\0' || (c) == '\a' || (c) == '\b' \ || (c) == '\f' || (c) == '\n' || (c) == '\r' \ || (c) == '\t' || (c) == '\v' \ - || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */ \ + || inRANGE((c), 1, 3) /* SOH, STX, ETX */ \ || (c) == 7 /* U+7F DEL */ \ - || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */ \ - /* DLE, DC[1-3] */ \ + || inRANGE((c), 0x0E, 0x13) /* SO SI DLE \ + DC[1-3] */ \ || (c) == 0x18 /* U+18 CAN */ \ || (c) == 0x19 /* U+19 EOM */ \ - || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */ \ + || inRANGE((c), 0x1C, 0x1F) /* [FGRU]S */ \ || (c) == 0x26 /* U+17 ETB */ \ || (c) == 0x27 /* U+1B ESC */ \ || (c) == 0x2D /* U+05 ENQ */ \ @@ -1412,8 +1443,8 @@ END_EXTERN_C || NATIVE_TO_LATIN1((U8) c) == 0xA0))) # define isUPPER_L1(c) (isUPPER_A(c) \ || (FITS_IN_8_BITS(c) \ - && ( NATIVE_TO_LATIN1((U8) c) >= 0xC0 \ - && NATIVE_TO_LATIN1((U8) c) <= 0xDE \ + && ( IN_RANGE(NATIVE_TO_LATIN1((U8) c), \ + 0xC0, 0xDE) \ && NATIVE_TO_LATIN1((U8) c) != 0xD7))) # define isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT_A(c)) # define isIDFIRST_L1(c) (isALPHA_L1(c) || NATIVE_TO_LATIN1(c) == '_')