* XXX Similarly, a Configure probe for __FILE__ and __LINE__ is needed. */
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__SUNPRO_C)) /* C99 or close enough. */
# define FUNCTION__ __func__
-#elif (defined(USING_MSVC6)) || /* MSVC6 has neither __func__ nor __FUNCTION and no good workarounds, either. */ \
- (defined(__DECC_VER)) /* Tru64 or VMS, and strict C89 being used, but not modern enough cc (in Tur64, -c99 not known, only -std1). */
+#elif (defined(__DECC_VER)) /* Tru64 or VMS, and strict C89 being used, but not modern enough cc (in Tur64, -c99 not known, only -std1). */
# define FUNCTION__ ""
#else
# define FUNCTION__ __FUNCTION__ /* Common extension. */
checks often check for things that Really Cannot Happen, and Coverity
detects that and gets all excited. */
-#if defined(DEBUGGING) && !defined(__COVERITY__)
+#if defined(DEBUGGING) && !defined(__COVERITY__) \
+ && ! defined(PERL_SMALL_MACRO_BUFFER)
# define __ASSERT_(statement) assert(statement),
#else
# define __ASSERT_(statement)
C<l1> gives the number of bytes in C<s1>.
Returns zero if non-equal, or zero if non-equal.
+=for apidoc Am|bool|memCHRs|"list"|char c
+Returns the position of the first occurence of the byte C<c> in the literal
+string C<"list">, or NULL if C<c> doesn't appear in C<"list">. All bytes are
+treated as unsigned char. Thus this macro can be used to determine if C<c> is
+in a set of particular characters. Unlike L<strchr(3)>, it works even if C<c>
+is C<NUL> (and the set doesn't include C<NUL>).
+
=cut
New macros should use the following conventions for their names (which are
#define memGT(s1,s2,l) (memcmp(s1,s2,l) > 0)
#define memGE(s1,s2,l) (memcmp(s1,s2,l) >= 0)
+#define memCHRs(s1,c) ((const char *) memchr("" s1 "" , c, sizeof(s1)-1))
+
/*
* Character classes.
*
=for apidoc Amh|bool|isALPHA_L1|int ch
=for apidoc Amh|bool|isALPHA_uvchr|int ch
=for apidoc Amh|bool|isALPHA_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isALPHA_utf8|U8 * s
+=for apidoc Amh|bool|isALPHA_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isALPHA_LC|int ch
=for apidoc Amh|bool|isALPHA_LC_uvchr|int ch
=for apidoc Amh|bool|isALPHA_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isALPHANUMERIC_L1|int ch
=for apidoc Amh|bool|isALPHANUMERIC_uvchr|int ch
=for apidoc Amh|bool|isALPHANUMERIC_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isALPHANUMERIC_utf8|U8 * s
+=for apidoc Amh|bool|isALPHANUMERIC_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isALPHANUMERIC_LC|int ch
=for apidoc Amh|bool|isALPHANUMERIC_LC_uvchr|int ch
=for apidoc Amh|bool|isALPHANUMERIC_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isASCII_L1|int ch
=for apidoc Amh|bool|isASCII_uvchr|int ch
=for apidoc Amh|bool|isASCII_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isASCII_utf8|U8 * s
+=for apidoc Amh|bool|isASCII_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isASCII_LC|int ch
=for apidoc Amh|bool|isASCII_LC_uvchr|int ch
=for apidoc Amh|bool|isASCII_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isBLANK_L1|int ch
=for apidoc Amh|bool|isBLANK_uvchr|int ch
=for apidoc Amh|bool|isBLANK_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isBLANK_utf8|U8 * s
+=for apidoc Amh|bool|isBLANK_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isBLANK_LC|int ch
=for apidoc Amh|bool|isBLANK_LC_uvchr|int ch
=for apidoc Amh|bool|isBLANK_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isCNTRL_L1|int ch
=for apidoc Amh|bool|isCNTRL_uvchr|int ch
=for apidoc Amh|bool|isCNTRL_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isCNTRL_utf8|U8 * s
+=for apidoc Amh|bool|isCNTRL_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isCNTRL_LC|int ch
=for apidoc Amh|bool|isCNTRL_LC_uvchr|int ch
=for apidoc Amh|bool|isCNTRL_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isDIGIT_L1|int ch
=for apidoc Amh|bool|isDIGIT_uvchr|int ch
=for apidoc Amh|bool|isDIGIT_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isDIGIT_utf8|U8 * s
+=for apidoc Amh|bool|isDIGIT_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isDIGIT_LC|int ch
=for apidoc Amh|bool|isDIGIT_LC_uvchr|int ch
=for apidoc Amh|bool|isDIGIT_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isGRAPH_L1|int ch
=for apidoc Amh|bool|isGRAPH_uvchr|int ch
=for apidoc Amh|bool|isGRAPH_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isGRAPH_utf8|U8 * s
+=for apidoc Amh|bool|isGRAPH_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isGRAPH_LC|int ch
=for apidoc Amh|bool|isGRAPH_LC_uvchr|int ch
=for apidoc Amh|bool|isGRAPH_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isLOWER_L1|int ch
=for apidoc Amh|bool|isLOWER_uvchr|int ch
=for apidoc Amh|bool|isLOWER_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isLOWER_utf8|U8 * s
+=for apidoc Amh|bool|isLOWER_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isLOWER_LC|int ch
=for apidoc Amh|bool|isLOWER_LC_uvchr|int ch
=for apidoc Amh|bool|isLOWER_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isPUNCT_L1|int ch
=for apidoc Amh|bool|isPUNCT_uvchr|int ch
=for apidoc Amh|bool|isPUNCT_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isPUNCT_utf8|U8 * s
+=for apidoc Amh|bool|isPUNCT_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isPUNCT_LC|int ch
=for apidoc Amh|bool|isPUNCT_LC_uvchr|int ch
=for apidoc Amh|bool|isPUNCT_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isSPACE_L1|int ch
=for apidoc Amh|bool|isSPACE_uvchr|int ch
=for apidoc Amh|bool|isSPACE_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isSPACE_utf8|U8 * s
+=for apidoc Amh|bool|isSPACE_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isSPACE_LC|int ch
=for apidoc Amh|bool|isSPACE_LC_uvchr|int ch
=for apidoc Amh|bool|isSPACE_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isPSXSPC_L1|int ch
=for apidoc Amh|bool|isPSXSPC_uvchr|int ch
=for apidoc Amh|bool|isPSXSPC_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isPSXSPC_utf8|U8 * s
+=for apidoc Amh|bool|isPSXSPC_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isPSXSPC_LC|int ch
=for apidoc Amh|bool|isPSXSPC_LC_uvchr|int ch
=for apidoc Amh|bool|isPSXSPC_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isUPPER_L1|int ch
=for apidoc Amh|bool|isUPPER_uvchr|int ch
=for apidoc Amh|bool|isUPPER_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isUPPER_utf8|U8 * s
+=for apidoc Amh|bool|isUPPER_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isUPPER_LC|int ch
=for apidoc Amh|bool|isUPPER_LC_uvchr|int ch
=for apidoc Amh|bool|isUPPER_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isPRINT_L1|int ch
=for apidoc Amh|bool|isPRINT_uvchr|int ch
=for apidoc Amh|bool|isPRINT_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isPRINT_utf8|U8 * s
+=for apidoc Amh|bool|isPRINT_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isPRINT_LC|int ch
=for apidoc Amh|bool|isPRINT_LC_uvchr|int ch
=for apidoc Amh|bool|isPRINT_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isWORDCHAR_L1|int ch
=for apidoc Amh|bool|isWORDCHAR_uvchr|int ch
=for apidoc Amh|bool|isWORDCHAR_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isWORDCHAR_utf8|U8 * s
+=for apidoc Amh|bool|isWORDCHAR_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isWORDCHAR_LC|int ch
=for apidoc Amh|bool|isWORDCHAR_LC_uvchr|int ch
=for apidoc Amh|bool|isWORDCHAR_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isXDIGIT_L1|int ch
=for apidoc Amh|bool|isXDIGIT_uvchr|int ch
=for apidoc Amh|bool|isXDIGIT_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isXDIGIT_utf8|U8 * s
+=for apidoc Amh|bool|isXDIGIT_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isXDIGIT_LC|int ch
=for apidoc Amh|bool|isXDIGIT_LC_uvchr|int ch
=for apidoc Amh|bool|isXDIGIT_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isIDFIRST_L1|int ch
=for apidoc Amh|bool|isIDFIRST_uvchr|int ch
=for apidoc Amh|bool|isIDFIRST_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isIDFIRST_utf8|U8 * s
+=for apidoc Amh|bool|isIDFIRST_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isIDFIRST_LC|int ch
=for apidoc Amh|bool|isIDFIRST_LC_uvchr|int ch
=for apidoc Amh|bool|isIDFIRST_LC_utf8_safe|U8 * s| U8 *end
=for apidoc Amh|bool|isIDCONT_L1|int ch
=for apidoc Amh|bool|isIDCONT_uvchr|int ch
=for apidoc Amh|bool|isIDCONT_utf8_safe|U8 * s|U8 * end
-=for apidoc Amh|bool|isIDCONT_utf8|U8 * s
+=for apidoc Amh|bool|isIDCONT_utf8|U8 * s|U8 * end
=for apidoc Amh|bool|isIDCONT_LC|int ch
=for apidoc Amh|bool|isIDCONT_LC_uvchr|int ch
=for apidoc Amh|bool|isIDCONT_LC_utf8_safe|U8 * s| U8 *end
# define _CC_QUOTEMETA 20
# define _CC_NON_FINAL_FOLD 21
# define _CC_IS_IN_SOME_FOLD 22
-# define _CC_MNEMONIC_CNTRL 23
+# define _CC_BINDIGIT 23
+# define _CC_OCTDIGIT 24
+# define _CC_MNEMONIC_CNTRL 25
/* This next group is only used on EBCDIC platforms, so theoretically could be
* shared with something entirely different that's only on ASCII platforms */
_generic_isCC(c, _CC_NON_FINAL_FOLD)
# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
_generic_isCC(c, _CC_IS_IN_SOME_FOLD)
-# define _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
- _generic_isCC(c, _CC_MNEMONIC_CNTRL)
+
+/* is c a control character for which we have a mnemonic? */
+# if defined(PERL_CORE) || defined(PERL_EXT)
+# define isMNEMONIC_CNTRL(c) _generic_isCC(c, _CC_MNEMONIC_CNTRL)
+# endif
#else /* else we don't have perl.h H_PERL */
/* If we don't have perl.h, we are compiling a utility program. Below we
|| (c) == '\f' || (c) == '\n' || (c) == '\r' \
|| (c) == '\t' || (c) == '\v' \
|| inRANGE((c), 1, 3) /* SOH, STX, ETX */ \
- || (c) == 7 /* U+7F DEL */ \
+ || (c) == 7F /* U+7F DEL */ \
|| inRANGE((c), 0x0E, 0x13) /* SO SI DLE \
DC[1-3] */ \
|| (c) == 0x18 /* U+18 CAN */ \
* number defined earlier in this file. _generic_uvchr() is used for POSIX
* classes where there is a macro or function 'above_latin1' that takes the
* single argument 'c' and returns the desired value. These exist for those
- * classes which have simple definitions, avoiding the overhead of a hash
- * lookup or inversion list binary search. _generic_swash_uvchr() can be used
+ * classes which have simple definitions, avoiding the overhead of an inversion
+ * list binary search. _generic_invlist_uvchr() can be used
* for classes where that overhead is faster than a direct lookup.
* _generic_uvchr() won't compile if 'c' isn't unsigned, as it won't match the
* 'above_latin1' prototype. _generic_isCC() macro does bounds checking, so
#define _generic_uvchr(classnum, above_latin1, c) ((c) < 256 \
? _generic_isCC(c, classnum) \
: above_latin1(c))
-#define _generic_swash_uvchr(classnum, c) ((c) < 256 \
+#define _generic_invlist_uvchr(classnum, c) ((c) < 256 \
? _generic_isCC(c, classnum) \
: _is_uni_FOO(classnum, c))
-#define isALPHA_uvchr(c) _generic_swash_uvchr(_CC_ALPHA, c)
-#define isALPHANUMERIC_uvchr(c) _generic_swash_uvchr(_CC_ALPHANUMERIC, c)
+#define isALPHA_uvchr(c) _generic_invlist_uvchr(_CC_ALPHA, c)
+#define isALPHANUMERIC_uvchr(c) _generic_invlist_uvchr(_CC_ALPHANUMERIC, c)
#define isASCII_uvchr(c) isASCII(c)
#define isBLANK_uvchr(c) _generic_uvchr(_CC_BLANK, is_HORIZWS_cp_high, c)
#define isCNTRL_uvchr(c) isCNTRL_L1(c) /* All controls are in Latin1 */
-#define isDIGIT_uvchr(c) _generic_swash_uvchr(_CC_DIGIT, c)
-#define isGRAPH_uvchr(c) _generic_swash_uvchr(_CC_GRAPH, c)
+#define isDIGIT_uvchr(c) _generic_invlist_uvchr(_CC_DIGIT, c)
+#define isGRAPH_uvchr(c) _generic_invlist_uvchr(_CC_GRAPH, c)
#define isIDCONT_uvchr(c) \
_generic_uvchr(_CC_WORDCHAR, _is_uni_perl_idcont, c)
#define isIDFIRST_uvchr(c) \
_generic_uvchr(_CC_IDFIRST, _is_uni_perl_idstart, c)
-#define isLOWER_uvchr(c) _generic_swash_uvchr(_CC_LOWER, c)
-#define isPRINT_uvchr(c) _generic_swash_uvchr(_CC_PRINT, c)
+#define isLOWER_uvchr(c) _generic_invlist_uvchr(_CC_LOWER, c)
+#define isPRINT_uvchr(c) _generic_invlist_uvchr(_CC_PRINT, c)
-#define isPUNCT_uvchr(c) _generic_swash_uvchr(_CC_PUNCT, c)
+#define isPUNCT_uvchr(c) _generic_invlist_uvchr(_CC_PUNCT, c)
#define isSPACE_uvchr(c) _generic_uvchr(_CC_SPACE, is_XPERLSPACE_cp_high, c)
#define isPSXSPC_uvchr(c) isSPACE_uvchr(c)
-#define isUPPER_uvchr(c) _generic_swash_uvchr(_CC_UPPER, c)
+#define isUPPER_uvchr(c) _generic_invlist_uvchr(_CC_UPPER, c)
#define isVERTWS_uvchr(c) _generic_uvchr(_CC_VERTSPACE, is_VERTWS_cp_high, c)
-#define isWORDCHAR_uvchr(c) _generic_swash_uvchr(_CC_WORDCHAR, c)
+#define isWORDCHAR_uvchr(c) _generic_invlist_uvchr(_CC_WORDCHAR, c)
#define isXDIGIT_uvchr(c) _generic_uvchr(_CC_XDIGIT, is_XDIGIT_cp_high, c)
#define toFOLD_uvchr(c,s,l) to_uni_fold(c,s,l)
* _generic_uvchr, so see it for more info. */
#define _generic_LC_uvchr(latin1, above_latin1, c) \
(c < 256 ? latin1(c) : above_latin1(c))
-#define _generic_LC_swash_uvchr(latin1, classnum, c) \
+#define _generic_LC_invlist_uvchr(latin1, classnum, c) \
(c < 256 ? latin1(c) : _is_uni_FOO(classnum, c))
-#define isALPHA_LC_uvchr(c) _generic_LC_swash_uvchr(isALPHA_LC, _CC_ALPHA, c)
-#define isALPHANUMERIC_LC_uvchr(c) _generic_LC_swash_uvchr(isALPHANUMERIC_LC, \
+#define isALPHA_LC_uvchr(c) _generic_LC_invlist_uvchr(isALPHA_LC, _CC_ALPHA, c)
+#define isALPHANUMERIC_LC_uvchr(c) _generic_LC_invlist_uvchr(isALPHANUMERIC_LC, \
_CC_ALPHANUMERIC, c)
#define isASCII_LC_uvchr(c) isASCII_LC(c)
#define isBLANK_LC_uvchr(c) _generic_LC_uvchr(isBLANK_LC, \
is_HORIZWS_cp_high, c)
#define isCNTRL_LC_uvchr(c) (c < 256 ? isCNTRL_LC(c) : 0)
-#define isDIGIT_LC_uvchr(c) _generic_LC_swash_uvchr(isDIGIT_LC, _CC_DIGIT, c)
-#define isGRAPH_LC_uvchr(c) _generic_LC_swash_uvchr(isGRAPH_LC, _CC_GRAPH, c)
+#define isDIGIT_LC_uvchr(c) _generic_LC_invlist_uvchr(isDIGIT_LC, _CC_DIGIT, c)
+#define isGRAPH_LC_uvchr(c) _generic_LC_invlist_uvchr(isGRAPH_LC, _CC_GRAPH, c)
#define isIDCONT_LC_uvchr(c) _generic_LC_uvchr(isIDCONT_LC, \
_is_uni_perl_idcont, c)
#define isIDFIRST_LC_uvchr(c) _generic_LC_uvchr(isIDFIRST_LC, \
_is_uni_perl_idstart, c)
-#define isLOWER_LC_uvchr(c) _generic_LC_swash_uvchr(isLOWER_LC, _CC_LOWER, c)
-#define isPRINT_LC_uvchr(c) _generic_LC_swash_uvchr(isPRINT_LC, _CC_PRINT, c)
+#define isLOWER_LC_uvchr(c) _generic_LC_invlist_uvchr(isLOWER_LC, _CC_LOWER, c)
+#define isPRINT_LC_uvchr(c) _generic_LC_invlist_uvchr(isPRINT_LC, _CC_PRINT, c)
#define isPSXSPC_LC_uvchr(c) isSPACE_LC_uvchr(c)
-#define isPUNCT_LC_uvchr(c) _generic_LC_swash_uvchr(isPUNCT_LC, _CC_PUNCT, c)
+#define isPUNCT_LC_uvchr(c) _generic_LC_invlist_uvchr(isPUNCT_LC, _CC_PUNCT, c)
#define isSPACE_LC_uvchr(c) _generic_LC_uvchr(isSPACE_LC, \
is_XPERLSPACE_cp_high, c)
-#define isUPPER_LC_uvchr(c) _generic_LC_swash_uvchr(isUPPER_LC, _CC_UPPER, c)
-#define isWORDCHAR_LC_uvchr(c) _generic_LC_swash_uvchr(isWORDCHAR_LC, \
+#define isUPPER_LC_uvchr(c) _generic_LC_invlist_uvchr(isUPPER_LC, _CC_UPPER, c)
+#define isWORDCHAR_LC_uvchr(c) _generic_LC_invlist_uvchr(isWORDCHAR_LC, \
_CC_WORDCHAR, c)
#define isXDIGIT_LC_uvchr(c) _generic_LC_uvchr(isXDIGIT_LC, \
is_XDIGIT_cp_high, c)
* 'above_latin1' can be a macro */
#define _generic_func_utf8_safe(classnum, above_latin1, p, e) \
_generic_utf8_safe(classnum, p, e, above_latin1(p, e))
-#define _generic_non_swash_utf8_safe(classnum, above_latin1, p, e) \
+#define _generic_non_invlist_utf8_safe(classnum, above_latin1, p, e) \
_generic_utf8_safe(classnum, p, e, \
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
? (_force_out_malformed_utf8_message( \
(U8 *) (p), (U8 *) (e), 0, 1), 0) \
: above_latin1(p)))
-/* Like the above, but passes classnum to _isFOO_utf8_with_len(), instead of
- * having an 'above_latin1' parameter */
-#define _generic_swash_utf8_safe(classnum, p, e) \
-_generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e))
+/* Like the above, but passes classnum to _isFOO_utf8(), instead of having an
+ * 'above_latin1' parameter */
+#define _generic_invlist_utf8_safe(classnum, p, e) \
+ _generic_utf8_safe(classnum, p, e, _is_utf8_FOO(classnum, p, e))
/* Like the above, but should be used only when it is known that there are no
* characters in the upper-Latin1 range (128-255 on ASCII platforms) which the
#define isWORDCHAR_utf8(p, e) isWORDCHAR_utf8_safe(p, e)
#define isXDIGIT_utf8(p, e) isXDIGIT_utf8_safe(p, e)
-#define isALPHA_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_ALPHA, p, e)
+#define isALPHA_utf8_safe(p, e) _generic_invlist_utf8_safe(_CC_ALPHA, p, e)
#define isALPHANUMERIC_utf8_safe(p, e) \
- _generic_swash_utf8_safe(_CC_ALPHANUMERIC, p, e)
+ _generic_invlist_utf8_safe(_CC_ALPHANUMERIC, p, e)
#define isASCII_utf8_safe(p, e) \
/* Because ASCII is invariant under utf8, the non-utf8 macro \
* works */ \
(__ASSERT_(_utf8_safe_assert(p, e)) isASCII(*(p)))
#define isBLANK_utf8_safe(p, e) \
- _generic_non_swash_utf8_safe(_CC_BLANK, is_HORIZWS_high, p, e)
+ _generic_non_invlist_utf8_safe(_CC_BLANK, is_HORIZWS_high, p, e)
#ifdef EBCDIC
/* Because all controls are UTF-8 invariants in EBCDIC, we can use this
#define isDIGIT_utf8_safe(p, e) \
_generic_utf8_safe_no_upper_latin1(_CC_DIGIT, p, e, \
- _is_utf8_FOO_with_len(_CC_DIGIT, p, e))
-#define isGRAPH_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_GRAPH, p, e)
+ _is_utf8_FOO(_CC_DIGIT, p, e))
+#define isGRAPH_utf8_safe(p, e) _generic_invlist_utf8_safe(_CC_GRAPH, p, e)
#define isIDCONT_utf8_safe(p, e) _generic_func_utf8_safe(_CC_WORDCHAR, \
- _is_utf8_perl_idcont_with_len, p, e)
+ _is_utf8_perl_idcont, p, e)
/* To prevent S_scan_word in toke.c from hanging, we have to make sure that
* IDFIRST is an alnum. See
- * https://rt.perl.org/rt3/Ticket/Display.html?id=74022 for more detail than you
+ * https://github.com/Perl/perl5/issues/10275 for more detail than you
* ever wanted to know about. (In the ASCII range, there isn't a difference.)
* This used to be not the XID version, but we decided to go with the more
* modern Unicode definition */
#define isIDFIRST_utf8_safe(p, e) \
_generic_func_utf8_safe(_CC_IDFIRST, \
- _is_utf8_perl_idstart_with_len, (U8 *) (p), (U8 *) (e))
+ _is_utf8_perl_idstart, (U8 *) (p), (U8 *) (e))
-#define isLOWER_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_LOWER, p, e)
-#define isPRINT_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_PRINT, p, e)
+#define isLOWER_utf8_safe(p, e) _generic_invlist_utf8_safe(_CC_LOWER, p, e)
+#define isPRINT_utf8_safe(p, e) _generic_invlist_utf8_safe(_CC_PRINT, p, e)
#define isPSXSPC_utf8_safe(p, e) isSPACE_utf8_safe(p, e)
-#define isPUNCT_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_PUNCT, p, e)
+#define isPUNCT_utf8_safe(p, e) _generic_invlist_utf8_safe(_CC_PUNCT, p, e)
#define isSPACE_utf8_safe(p, e) \
- _generic_non_swash_utf8_safe(_CC_SPACE, is_XPERLSPACE_high, p, e)
-#define isUPPER_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_UPPER, p, e)
+ _generic_non_invlist_utf8_safe(_CC_SPACE, is_XPERLSPACE_high, p, e)
+#define isUPPER_utf8_safe(p, e) _generic_invlist_utf8_safe(_CC_UPPER, p, e)
#define isVERTWS_utf8_safe(p, e) \
- _generic_non_swash_utf8_safe(_CC_VERTSPACE, is_VERTWS_high, p, e)
+ _generic_non_invlist_utf8_safe(_CC_VERTSPACE, is_VERTWS_high, p, e)
#define isWORDCHAR_utf8_safe(p, e) \
- _generic_swash_utf8_safe(_CC_WORDCHAR, p, e)
+ _generic_invlist_utf8_safe(_CC_WORDCHAR, p, e)
#define isXDIGIT_utf8_safe(p, e) \
_generic_utf8_safe_no_upper_latin1(_CC_XDIGIT, p, e, \
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
(U8 *) (p), (U8 *) (e), 0, 1), 0)) \
: above_latin1))
-#define _generic_LC_swash_utf8_safe(macro, classnum, p, e) \
+#define _generic_LC_invlist_utf8_safe(macro, classnum, p, e) \
_generic_LC_utf8_safe(macro, p, e, \
- _is_utf8_FOO_with_len(classnum, p, e))
+ _is_utf8_FOO(classnum, p, e))
#define _generic_LC_func_utf8_safe(macro, above_latin1, p, e) \
_generic_LC_utf8_safe(macro, p, e, above_latin1(p, e))
-#define _generic_LC_non_swash_utf8_safe(classnum, above_latin1, p, e) \
+#define _generic_LC_non_invlist_utf8_safe(classnum, above_latin1, p, e) \
_generic_LC_utf8_safe(classnum, p, e, \
(UNLIKELY((e) - (p) < UTF8SKIP(p)) \
? (_force_out_malformed_utf8_message( \
: above_latin1(p)))
#define isALPHANUMERIC_LC_utf8_safe(p, e) \
- _generic_LC_swash_utf8_safe(isALPHANUMERIC_LC, \
+ _generic_LC_invlist_utf8_safe(isALPHANUMERIC_LC, \
_CC_ALPHANUMERIC, p, e)
#define isALPHA_LC_utf8_safe(p, e) \
- _generic_LC_swash_utf8_safe(isALPHA_LC, _CC_ALPHA, p, e)
+ _generic_LC_invlist_utf8_safe(isALPHA_LC, _CC_ALPHA, p, e)
#define isASCII_LC_utf8_safe(p, e) \
(__ASSERT_(_utf8_safe_assert(p, e)) isASCII_LC(*(p)))
#define isBLANK_LC_utf8_safe(p, e) \
- _generic_LC_non_swash_utf8_safe(isBLANK_LC, is_HORIZWS_high, p, e)
+ _generic_LC_non_invlist_utf8_safe(isBLANK_LC, is_HORIZWS_high, p, e)
#define isCNTRL_LC_utf8_safe(p, e) \
_generic_LC_utf8_safe(isCNTRL_LC, p, e, 0)
#define isDIGIT_LC_utf8_safe(p, e) \
- _generic_LC_swash_utf8_safe(isDIGIT_LC, _CC_DIGIT, p, e)
+ _generic_LC_invlist_utf8_safe(isDIGIT_LC, _CC_DIGIT, p, e)
#define isGRAPH_LC_utf8_safe(p, e) \
- _generic_LC_swash_utf8_safe(isGRAPH_LC, _CC_GRAPH, p, e)
+ _generic_LC_invlist_utf8_safe(isGRAPH_LC, _CC_GRAPH, p, e)
#define isIDCONT_LC_utf8_safe(p, e) \
_generic_LC_func_utf8_safe(isIDCONT_LC, \
- _is_utf8_perl_idcont_with_len, p, e)
+ _is_utf8_perl_idcont, p, e)
#define isIDFIRST_LC_utf8_safe(p, e) \
_generic_LC_func_utf8_safe(isIDFIRST_LC, \
- _is_utf8_perl_idstart_with_len, p, e)
+ _is_utf8_perl_idstart, p, e)
#define isLOWER_LC_utf8_safe(p, e) \
- _generic_LC_swash_utf8_safe(isLOWER_LC, _CC_LOWER, p, e)
+ _generic_LC_invlist_utf8_safe(isLOWER_LC, _CC_LOWER, p, e)
#define isPRINT_LC_utf8_safe(p, e) \
- _generic_LC_swash_utf8_safe(isPRINT_LC, _CC_PRINT, p, e)
+ _generic_LC_invlist_utf8_safe(isPRINT_LC, _CC_PRINT, p, e)
#define isPSXSPC_LC_utf8_safe(p, e) isSPACE_LC_utf8_safe(p, e)
#define isPUNCT_LC_utf8_safe(p, e) \
- _generic_LC_swash_utf8_safe(isPUNCT_LC, _CC_PUNCT, p, e)
+ _generic_LC_invlist_utf8_safe(isPUNCT_LC, _CC_PUNCT, p, e)
#define isSPACE_LC_utf8_safe(p, e) \
- _generic_LC_non_swash_utf8_safe(isSPACE_LC, is_XPERLSPACE_high, p, e)
+ _generic_LC_non_invlist_utf8_safe(isSPACE_LC, is_XPERLSPACE_high, p, e)
#define isUPPER_LC_utf8_safe(p, e) \
- _generic_LC_swash_utf8_safe(isUPPER_LC, _CC_UPPER, p, e)
+ _generic_LC_invlist_utf8_safe(isUPPER_LC, _CC_UPPER, p, e)
#define isWORDCHAR_LC_utf8_safe(p, e) \
- _generic_LC_swash_utf8_safe(isWORDCHAR_LC, _CC_WORDCHAR, p, e)
+ _generic_LC_invlist_utf8_safe(isWORDCHAR_LC, _CC_WORDCHAR, p, e)
#define isXDIGIT_LC_utf8_safe(p, e) \
- _generic_LC_non_swash_utf8_safe(isXDIGIT_LC, is_XDIGIT_high, p, e)
+ _generic_LC_non_invlist_utf8_safe(isXDIGIT_LC, is_XDIGIT_high, p, e)
/* Macros for backwards compatibility and for completeness when the ASCII and
* Latin1 values are identical */
#define isALNUM_uni(c) isWORDCHAR_uni(c)
#define isALNUM_LC_uvchr(c) isWORDCHAR_LC_uvchr(c)
#define isALNUM_utf8(p,e) isWORDCHAR_utf8(p,e)
+#define isALNUM_utf8_safe(p,e) isWORDCHAR_utf8_safe(p,e)
#define isALNUM_LC_utf8(p,e)isWORDCHAR_LC_utf8(p,e)
+#define isALNUM_LC_utf8_safe(p,e)isWORDCHAR_LC_utf8_safe(p,e)
#define isALNUMC_A(c) isALPHANUMERIC_A(c) /* Mnemonic: "C's alnum" */
#define isALNUMC_L1(c) isALPHANUMERIC_L1(c)
#define isALNUMC(c) isALPHANUMERIC(c)
#define isALNUMC_uni(c) isALPHANUMERIC_uni(c)
#define isALNUMC_LC_uvchr(c) isALPHANUMERIC_LC_uvchr(c)
#define isALNUMC_utf8(p,e) isALPHANUMERIC_utf8(p,e)
-#define isALNUMC_LC_utf8(p,e) isALPHANUMERIC_LC_utf8(p,e)
+#define isALNUMC_utf8_safe(p,e) isALPHANUMERIC_utf8_safe(p,e)
+#define isALNUMC_LC_utf8_safe(p,e) isALPHANUMERIC_LC_utf8_safe(p,e)
/* On EBCDIC platforms, CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII,
* except that they don't necessarily mean the same characters, e.g. CTRL-D is
} \
return a;
-/* Converts a character known to represent a hexadecimal digit (0-9, A-F, or
- * a-f) to its numeric value. READ_XDIGIT's argument is a string pointer,
- * which is advanced. The input is validated only by an assert() in DEBUGGING
- * builds. In both ASCII and EBCDIC the last 4 bits of the digits are 0-9; and
- * the last 4 bits of A-F and a-f are 1-6, so adding 9 yields 10-15 */
-#define XDIGIT_VALUE(c) (__ASSERT_(isXDIGIT(c)) (0xf & (isDIGIT(c) \
- ? (c) \
- : ((c) + 9))))
-#define READ_XDIGIT(s) (__ASSERT_(isXDIGIT(*s)) (0xf & (isDIGIT(*(s)) \
- ? (*(s)++) \
- : (*(s)++ + 9))))
+/* Converts a character KNOWN to represent a hexadecimal digit (0-9, A-F, or
+ * a-f) to its numeric value without using any branches. The input is
+ * validated only by an assert() in DEBUGGING builds.
+ *
+ * It works by right shifting and isolating the bit that is 0 for the digits,
+ * and 1 for at least the alphas A-F, a-f. The bit is shifted to the ones
+ * position, and then to the eights position. Both are added together to form
+ * 0 if the input is '0'-'9' and to form 9 if alpha. This is added to the
+ * final four bits of the input to form the correct value. */
+#define XDIGIT_VALUE(c) (__ASSERT_(isXDIGIT(c)) \
+ ((NATIVE_TO_LATIN1(c) >> 6) & 1) /* 1 if alpha; 0 if not */ \
+ + ((NATIVE_TO_LATIN1(c) >> 3) & 8) /* 8 if alpha; 0 if not */ \
+ + ((c) & 0xF)) /* 0-9 if input valid hex digit */
+
+/* The argument is a string pointer, which is advanced. */
+#define READ_XDIGIT(s) ((s)++, XDIGIT_VALUE(*((s) - 1)))
/* Converts a character known to represent an octal digit (0-7) to its numeric
* value. The input is validated only by an assert() in DEBUGGING builds. In