* If not, subtracting 1 would leave the uppermost bit set, so the & would
* yield non-zero */
#if defined(PERL_CORE) || defined(PERL_EXT)
-# define isPOWER_OF_2(n) ((n & (n-1)) == 0)
+# define isPOWER_OF_2(n) ((n) && ((n) & ((n)-1)) == 0)
#endif
/* This is a helper macro to avoid preprocessor issues, replaced by nothing
#define strnNE(s1,s2,l) (strncmp(s1,s2,l) != 0)
#define strnEQ(s1,s2,l) (strncmp(s1,s2,l) == 0)
-#define memNE(s1,s2,l) (memcmp(s1,s2,l) != 0)
-#define memEQ(s1,s2,l) (memcmp(s1,s2,l) == 0)
+#define memEQ(s1,s2,l) (memcmp(((const void *) (s1)), ((const void *) (s2)), l) == 0)
+#define memNE(s1,s2,l) (! memEQ(s1,s2,l))
/* memEQ and memNE where second comparand is a string constant */
#define memEQs(s1, l, s2) \
#define FITS_IN_8_BITS(c) (1)
#endif
+/* Returns true if c is in the range l..u, where 'l' is non-negative
+ * Written this way so that after optimization, only one conditional test is
+ * needed.
+ *
+ * This isn't fully general, except for the special cased 'signed char' (which
+ * should be resolved at compile time): It won't work if 'c' is negative, and
+ * 'l' is larger than the max for that signed type. Thus if 'c' is a negative
+ * int, and 'l' is larger than INT_MAX, it will fail. To protect agains this
+ * happening, there is an assert that will generate a warning if c is larger
+ * than e.g. INT_MAX if it is an 'unsigned int'. This could be a false
+ * positive, but khw couldn't figure out a way to make it better. It's good
+ * enough so far */
+#define inRANGE(c, l, u) (__ASSERT_((l) >= 0) __ASSERT_((u) >= (l)) \
+ ((sizeof(c) == 1) \
+ ? (((WIDEST_UTYPE) ((((U8) (c))|0) - (l))) <= ((WIDEST_UTYPE) ((u) - (l)))) \
+ : (__ASSERT_( (((WIDEST_UTYPE) 1) << (CHARBITS * sizeof(c) - 1) & (c)) \
+ /* sign bit of c is 0 */ == 0 \
+ || (((~ ((WIDEST_UTYPE) 1) << ((CHARBITS * sizeof(c) - 1) - 1))\
+ /* l not larger than largest value in c's signed type */ \
+ & ~ ((WIDEST_UTYPE) 0)) & (l)) == 0) \
+ ((WIDEST_UTYPE) (((c) - (l)) | 0) <= ((WIDEST_UTYPE) ((u) - (l)))))))
+
#ifdef EBCDIC
# ifndef _ALL_SOURCE
/* The native libc isascii() et.al. functions return the wrong results
&& ((PL_charclass[(U8) (c)] & _CC_mask_A(classnum)) \
== _CC_mask_A(classnum)))
-# define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA)
+/* On ASCII platforms certain classes form a single range. It's faster to
+ * special case these. isDIGIT is a single range on all platforms */
+# ifdef EBCDIC
+# define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA)
+# define isGRAPH_A(c) _generic_isCC_A(c, _CC_GRAPH)
+# define isLOWER_A(c) _generic_isCC_A(c, _CC_LOWER)
+# define isPRINT_A(c) _generic_isCC_A(c, _CC_PRINT)
+# define isUPPER_A(c) _generic_isCC_A(c, _CC_UPPER)
+# else
+ /* By folding the upper and lowercase, we can use a single range */
+# define isALPHA_A(c) inRANGE((~('A' ^ 'a') & (c)), 'A', 'Z')
+# define isGRAPH_A(c) inRANGE(c, ' ' + 1, 0x7e)
+# define isLOWER_A(c) inRANGE(c, 'a', 'z')
+# define isPRINT_A(c) inRANGE(c, ' ', 0x7e)
+# define isUPPER_A(c) inRANGE(c, 'A', 'Z')
+# endif
# define isALPHANUMERIC_A(c) _generic_isCC_A(c, _CC_ALPHANUMERIC)
# define isBLANK_A(c) _generic_isCC_A(c, _CC_BLANK)
# define isCNTRL_A(c) _generic_isCC_A(c, _CC_CNTRL)
-# define isDIGIT_A(c) _generic_isCC(c, _CC_DIGIT) /* No non-ASCII digits */
-# define isGRAPH_A(c) _generic_isCC_A(c, _CC_GRAPH)
-# define isLOWER_A(c) _generic_isCC_A(c, _CC_LOWER)
-# define isPRINT_A(c) _generic_isCC_A(c, _CC_PRINT)
+# define isDIGIT_A(c) inRANGE(c, '0', '9')
# define isPUNCT_A(c) _generic_isCC_A(c, _CC_PUNCT)
# define isSPACE_A(c) _generic_isCC_A(c, _CC_SPACE)
-# define isUPPER_A(c) _generic_isCC_A(c, _CC_UPPER)
# define isWORDCHAR_A(c) _generic_isCC_A(c, _CC_WORDCHAR)
# define isXDIGIT_A(c) _generic_isCC(c, _CC_XDIGIT) /* No non-ASCII xdigits
*/
* hard-code various macro definitions that wouldn't otherwise be available
* to it. Most are coded based on first principles. These are written to
* avoid EBCDIC vs. ASCII #ifdef's as much as possible. */
-# define isDIGIT_A(c) ((c) <= '9' && (c) >= '0')
+# define isDIGIT_A(c) inRANGE(c, '0', '9')
# define isBLANK_A(c) ((c) == ' ' || (c) == '\t')
# define isSPACE_A(c) (isBLANK_A(c) \
|| (c) == '\n' \
* uppercase. The tests for those aren't necessary on ASCII, but hurt only
* performance (if optimization isn't on), and allow the same code to be
* used for both platform types */
-# define isLOWER_A(c) ((c) >= 'a' && (c) <= 'z' \
- && ( (c) <= 'i' \
- || ((c) >= 'j' && (c) <= 'r') \
- || (c) >= 's'))
-# define isUPPER_A(c) ((c) >= 'A' && (c) <= 'Z' \
- && ( (c) <= 'I' \
- || ((c) >= 'J' && (c) <= 'R') \
- || (c) >= 'S'))
+# define isLOWER_A(c) inRANGE((c), 'a', 'i') \
+ || inRANGE((c), 'j', 'r') \
+ || inRANGE((c), 's', 'z')
+# define isUPPER_A(c) inRANGE((c), 'A', 'I') \
+ || inRANGE((c), 'J', 'R') \
+ || inRANGE((c), 'S', 'Z')
# define isALPHA_A(c) (isUPPER_A(c) || isLOWER_A(c))
# define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c))
# define isWORDCHAR_A(c) (isALPHANUMERIC_A(c) || (c) == '_')
# define isIDFIRST_A(c) (isALPHA_A(c) || (c) == '_')
-# define isXDIGIT_A(c) (isDIGIT_A(c) \
- || ((c) >= 'a' && (c) <= 'f') \
- || ((c) <= 'F' && (c) >= 'A'))
+# define isXDIGIT_A(c) ( isDIGIT_A(c) \
+ || inRANGE((c), 'a', 'f') \
+ || inRANGE((c), 'A', 'F')
# define isPUNCT_A(c) ((c) == '-' || (c) == '!' || (c) == '"' \
|| (c) == '#' || (c) == '$' || (c) == '%' \
|| (c) == '&' || (c) == '\'' || (c) == '(' \
# define isCNTRL_A(c) ((c) == '\0' || (c) == '\a' || (c) == '\b' \
|| (c) == '\f' || (c) == '\n' || (c) == '\r' \
|| (c) == '\t' || (c) == '\v' \
- || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */ \
+ || inRANGE((c), 1, 3) /* SOH, STX, ETX */ \
|| (c) == 7 /* U+7F DEL */ \
- || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */ \
- /* DLE, DC[1-3] */ \
+ || inRANGE((c), 0x0E, 0x13) /* SO SI DLE \
+ DC[1-3] */ \
|| (c) == 0x18 /* U+18 CAN */ \
|| (c) == 0x19 /* U+19 EOM */ \
- || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */ \
+ || inRANGE((c), 0x1C, 0x1F) /* [FGRU]S */ \
|| (c) == 0x26 /* U+17 ETB */ \
|| (c) == 0x27 /* U+1B ESC */ \
|| (c) == 0x2D /* U+05 ENQ */ \
|| NATIVE_TO_LATIN1((U8) c) == 0xA0)))
# define isUPPER_L1(c) (isUPPER_A(c) \
|| (FITS_IN_8_BITS(c) \
- && ( NATIVE_TO_LATIN1((U8) c) >= 0xC0 \
- && NATIVE_TO_LATIN1((U8) c) <= 0xDE \
+ && ( IN_RANGE(NATIVE_TO_LATIN1((U8) c), \
+ 0xC0, 0xDE) \
&& NATIVE_TO_LATIN1((U8) c) != 0xD7)))
# define isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT_A(c))
# define isIDFIRST_L1(c) (isALPHA_L1(c) || NATIVE_TO_LATIN1(c) == '_')
|| (char)(c) == '_'))
/* These next three are also for internal core Perl use only: case-change
- * helper macros */
+ * helper macros. The reason for using the PL_latin arrays is in case the
+ * system function is defective; it ensures uniform results that conform to the
+ * Unicod standard. It does not handle the anomalies in UTF-8 Turkic locales */
#define _generic_toLOWER_LC(c, function, cast) (! FITS_IN_8_BITS(c) \
? (c) \
: (IN_UTF8_CTYPE_LOCALE) \
? PL_latin1_lc[ (U8) (c) ] \
- : (cast)function((cast)(c)))
+ : (cast)function((cast)(c)))
/* Note that the result can be larger than a byte in a UTF-8 locale. It
* returns a single value, so can't adequately return the upper case of LATIN
* SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two
* values "SS"); instead it asserts against that under DEBUGGING, and
- * otherwise returns its input */
+ * otherwise returns its input. It does not handle the anomalies in UTF-8
+ * Turkic locales. */
#define _generic_toUPPER_LC(c, function, cast) \
(! FITS_IN_8_BITS(c) \
? (c) \
* returns a single value, so can't adequately return the fold case of LATIN
* SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two
* values "ss"); instead it asserts against that under DEBUGGING, and
- * otherwise returns its input */
+ * otherwise returns its input. It does not handle the anomalies in UTF-8
+ * Turkic locales */
#define _generic_toFOLD_LC(c, function, cast) \
((UNLIKELY((c) == MICRO_SIGN) && IN_UTF8_CTYPE_LOCALE) \
? GREEK_SMALL_LETTER_MU \