X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/375f5f0648cdf36c13cb11499b332c99c710d138..091fed7c37756f670487d9d498015f5670416e37:/handy.h?ds=sidebyside diff --git a/handy.h b/handy.h index 351ffcd..c5c4d4b 100644 --- a/handy.h +++ b/handy.h @@ -96,19 +96,6 @@ Null SV pointer. (No longer available when C is defined.) # endif #endif -/* The NeXT dynamic loader headers will not build with the bool macro - So declare them now to clear confusion. -*/ -#if defined(NeXT) || defined(__NeXT__) -# undef FALSE -# undef TRUE - typedef enum bool { FALSE = 0, TRUE = 1 } bool; -# define ENUM_BOOL 1 -# ifndef HAS_BOOL -# define HAS_BOOL 1 -# endif /* !HAS_BOOL */ -#endif /* NeXT || __NeXT__ */ - #ifndef HAS_BOOL # ifdef bool # undef bool @@ -506,7 +493,7 @@ C). /* -=head1 Character classes +=head1 Character classification This section is about functions (really macros) that classify characters into types, such as punctuation versus alphabetic, etc. Most of these are analogous to regular expression character classes. (See @@ -562,14 +549,16 @@ is tested. =for apidoc Am|bool|isALPHA|char ch Returns a boolean indicating whether the specified character is an alphabetic character, analogous to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. =for apidoc Am|bool|isALPHANUMERIC|char ch Returns a boolean indicating whether the specified character is a either an alphabetic character or decimal digit, analogous to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. @@ -580,7 +569,8 @@ characters in the ASCII character set, analogous to C. On non-ASCII platforms, it returns TRUE iff this character corresponds to an ASCII character. Variants C and C are identical to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, and C. Note, however, that some platforms do not have the C library routine C. In these cases, the variants whose names contain @@ -595,7 +585,8 @@ work properly on any string encoded or not in UTF-8. =for apidoc Am|bool|isBLANK|char ch Returns a boolean indicating whether the specified character is a character considered to be a blank, analogous to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. Note, however, that some platforms do not have the C library routine C. In these cases, the @@ -605,7 +596,8 @@ without. =for apidoc Am|bool|isCNTRL|char ch Returns a boolean indicating whether the specified character is a control character, analogous to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C On EBCDIC platforms, you almost always want to use the C variant. @@ -614,21 +606,24 @@ On EBCDIC platforms, you almost always want to use the C variant. Returns a boolean indicating whether the specified character is a digit, analogous to C. Variants C and C are identical to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, and C. =for apidoc Am|bool|isGRAPH|char ch Returns a boolean indicating whether the specified character is a graphic character, analogous to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. =for apidoc Am|bool|isLOWER|char ch Returns a boolean indicating whether the specified character is a lowercase character, analogous to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. @@ -644,7 +639,8 @@ punctuation character, analogous to C. Note that the definition of what is punctuation isn't as straightforward as one might desire. See L for details. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. @@ -659,7 +655,8 @@ locale forms of this macro (the ones with C in their names) matched precisely what C does. In those releases, the only difference, in the non-locale variants, was that C did not match a vertical tab. (See L for a macro that matches a vertical tab in all releases.) -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. @@ -675,21 +672,24 @@ non-locale forms differ from their C forms only in that the C forms don't match a Vertical Tab, and the C forms do. Otherwise they are identical. Thus this macro is analogous to what C matches in a regular expression. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. =for apidoc Am|bool|isUPPER|char ch Returns a boolean indicating whether the specified character is an uppercase character, analogous to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. =for apidoc Am|bool|isPRINT|char ch Returns a boolean indicating whether the specified character is a printable character, analogous to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. @@ -702,7 +702,8 @@ a "mark" character that attaches to one of those (like some sort of accent). C is a synonym provided for backward compatibility, even though a word character includes more than the standard C language meaning of alphanumeric. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. @@ -710,7 +711,8 @@ C, C, and C. Returns a boolean indicating whether the specified character is a hexadecimal digit. In the ASCII range these are C<[0-9A-Fa-f]>. Variants C and C are identical to C. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, and C. @@ -719,7 +721,8 @@ Returns a boolean indicating whether the specified character can be the first character of an identifier. This is very close to, but not quite the same as the official Unicode property C. The difference is that this returns true only if the input character also matches L. -See the L for an explanation of variants +See the L for an explanation of +variants C, C, C, C, C, C, and C. @@ -728,7 +731,8 @@ Returns a boolean indicating whether the specified character can be the second or succeeding character of an identifier. This is very close to, but not quite the same as the official Unicode property C. The difference is that this returns true only if the input character also matches -L. See the L for an +L. See the L for +an explanation of variants C, C, C, C, C, C, and C. @@ -959,8 +963,7 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc # define _CC_QUOTEMETA 21 # define _CC_NON_FINAL_FOLD 22 # define _CC_IS_IN_SOME_FOLD 23 -# define _CC_BACKSLASH_FOO_LBRACE_IS_META 31 /* temp, see mk_PL_charclass.pl */ -/* Unused: 24-30 +/* Unused: 24-31 * If more bits are needed, one could add a second word for non-64bit * QUAD_IS_INT systems, using some #ifdefs to distinguish between having a 2nd * word or not. The IS_IN_SOME_FOLD bit is the most easily expendable, as it @@ -1320,46 +1323,21 @@ EXTCONST U32 PL_charclass[]; _generic_toLOWER_LC(c, function, cast))) /* Use the libc versions for these if available. */ -#if defined(HAS_ISASCII) && ! defined(USE_NEXT_CTYPE) +#if defined(HAS_ISASCII) # define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii( (U8) (c))) #else # define isASCII_LC(c) isASCII(c) #endif -#if defined(HAS_ISBLANK) && ! defined(USE_NEXT_CTYPE) +#if defined(HAS_ISBLANK) # define isBLANK_LC(c) _generic_LC(c, _CC_BLANK, isblank) #else /* Unlike isASCII, varies if in a UTF-8 locale */ # define isBLANK_LC(c) (IN_UTF8_CTYPE_LOCALE) ? isBLANK_L1(c) : isBLANK(c) #endif -#ifdef USE_NEXT_CTYPE /* NeXT computers */ - -# define _LC_CAST unsigned int /* Needed by _generic_LC. NeXT functions - use this as their input type */ - -# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, NXIsAlpha) -# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, NXIsAlNum) -# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, NXIsCntrl) -# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, NXIsDigit) -# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, NXIsGraph) -# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, NXIsAlpha) -# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, NXIsLower) -# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, NXIsPrint) -# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, NXIsPunct) -# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, NXIsSpace) -# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, NXIsUpper) -# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, NXIsAlNum) -# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, NXIsXdigit) - -# define toLOWER_LC(c) _generic_toLOWER_LC((c), NXToLower, unsigned int) -# define toUPPER_LC(c) _generic_toUPPER_LC((c), NXToUpper, unsigned int) -# define toFOLD_LC(c) _generic_toFOLD_LC((c), NXToLower, unsigned int) +#define _LC_CAST U8 -#else /* !USE_NEXT_CTYPE */ - -# define _LC_CAST U8 - -# ifdef WIN32 +#ifdef WIN32 /* The Windows functions don't bother to follow the POSIX standard, which * for example says that something can't both be a printable and a control. * But Windows treats the \t control as a printable, and does such things @@ -1370,69 +1348,68 @@ EXTCONST U32 PL_charclass[]; * Not all possible weirdnesses are checked for, just the ones that were * detected on actual Microsoft code pages */ -# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl) -# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace) - -# define isALPHA_LC(c) (_generic_LC(c, _CC_ALPHA, isalpha) && isALPHANUMERIC_LC(c)) -# define isALPHANUMERIC_LC(c) (_generic_LC(c, _CC_ALPHANUMERIC, isalnum) && ! isPUNCT_LC(c)) -# define isDIGIT_LC(c) (_generic_LC(c, _CC_DIGIT, isdigit) && isALPHANUMERIC_LC(c)) -# define isGRAPH_LC(c) (_generic_LC(c, _CC_GRAPH, isgraph) && isPRINT_LC(c)) -# define isIDFIRST_LC(c) (((c) == '_') || (_generic_LC(c, _CC_IDFIRST, isalpha) && ! isPUNCT_LC(c))) -# define isLOWER_LC(c) (_generic_LC(c, _CC_LOWER, islower) && isALPHA_LC(c)) -# define isPRINT_LC(c) (_generic_LC(c, _CC_PRINT, isprint) && ! isCNTRL_LC(c)) -# define isPUNCT_LC(c) (_generic_LC(c, _CC_PUNCT, ispunct) && ! isCNTRL_LC(c)) -# define isUPPER_LC(c) (_generic_LC(c, _CC_UPPER, isupper) && isALPHA_LC(c)) -# define isWORDCHAR_LC(c) (((c) == '_') || isALPHANUMERIC_LC(c)) -# define isXDIGIT_LC(c) (_generic_LC(c, _CC_XDIGIT, isxdigit) && isALPHANUMERIC_LC(c)) - -# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8) -# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8) -# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8) - -# elif defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII)) +# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl) +# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace) + +# define isALPHA_LC(c) (_generic_LC(c, _CC_ALPHA, isalpha) && isALPHANUMERIC_LC(c)) +# define isALPHANUMERIC_LC(c) (_generic_LC(c, _CC_ALPHANUMERIC, isalnum) && ! isPUNCT_LC(c)) +# define isDIGIT_LC(c) (_generic_LC(c, _CC_DIGIT, isdigit) && isALPHANUMERIC_LC(c)) +# define isGRAPH_LC(c) (_generic_LC(c, _CC_GRAPH, isgraph) && isPRINT_LC(c)) +# define isIDFIRST_LC(c) (((c) == '_') || (_generic_LC(c, _CC_IDFIRST, isalpha) && ! isPUNCT_LC(c))) +# define isLOWER_LC(c) (_generic_LC(c, _CC_LOWER, islower) && isALPHA_LC(c)) +# define isPRINT_LC(c) (_generic_LC(c, _CC_PRINT, isprint) && ! isCNTRL_LC(c)) +# define isPUNCT_LC(c) (_generic_LC(c, _CC_PUNCT, ispunct) && ! isCNTRL_LC(c)) +# define isUPPER_LC(c) (_generic_LC(c, _CC_UPPER, isupper) && isALPHA_LC(c)) +# define isWORDCHAR_LC(c) (((c) == '_') || isALPHANUMERIC_LC(c)) +# define isXDIGIT_LC(c) (_generic_LC(c, _CC_XDIGIT, isxdigit) && isALPHANUMERIC_LC(c)) + +# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8) +# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8) +# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8) + +#elif defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII)) /* For most other platforms */ -# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, isalpha) -# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, isalnum) -# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl) -# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, isdigit) -# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, isgraph) -# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, isalpha) -# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, islower) -# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, isprint) -# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, ispunct) -# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace) -# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, isupper) -# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, isalnum) -# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, isxdigit) - - -# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8) -# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8) -# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8) - -# else /* The final fallback position */ - -# define isALPHA_LC(c) (isascii(c) && isalpha(c)) -# define isALPHANUMERIC_LC(c) (isascii(c) && isalnum(c)) -# define isCNTRL_LC(c) (isascii(c) && iscntrl(c)) -# define isDIGIT_LC(c) (isascii(c) && isdigit(c)) -# define isGRAPH_LC(c) (isascii(c) && isgraph(c)) -# define isIDFIRST_LC(c) (isascii(c) && (isalpha(c) || (c) == '_')) -# define isLOWER_LC(c) (isascii(c) && islower(c)) -# define isPRINT_LC(c) (isascii(c) && isprint(c)) -# define isPUNCT_LC(c) (isascii(c) && ispunct(c)) -# define isSPACE_LC(c) (isascii(c) && isspace(c)) -# define isUPPER_LC(c) (isascii(c) && isupper(c)) -# define isWORDCHAR_LC(c) (isascii(c) && (isalnum(c) || (c) == '_')) -# define isXDIGIT_LC(c) (isascii(c) && isxdigit(c)) - -# define toLOWER_LC(c) (isascii(c) ? tolower(c) : (c)) -# define toUPPER_LC(c) (isascii(c) ? toupper(c) : (c)) -# define toFOLD_LC(c) (isascii(c) ? tolower(c) : (c)) +# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, isalpha) +# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, isalnum) +# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl) +# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, isdigit) +# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, isgraph) +# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, isalpha) +# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, islower) +# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, isprint) +# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, ispunct) +# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace) +# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, isupper) +# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, isalnum) +# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, isxdigit) + + +# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8) +# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8) +# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8) + +#else /* The final fallback position */ + +# define isALPHA_LC(c) (isascii(c) && isalpha(c)) +# define isALPHANUMERIC_LC(c) (isascii(c) && isalnum(c)) +# define isCNTRL_LC(c) (isascii(c) && iscntrl(c)) +# define isDIGIT_LC(c) (isascii(c) && isdigit(c)) +# define isGRAPH_LC(c) (isascii(c) && isgraph(c)) +# define isIDFIRST_LC(c) (isascii(c) && (isalpha(c) || (c) == '_')) +# define isLOWER_LC(c) (isascii(c) && islower(c)) +# define isPRINT_LC(c) (isascii(c) && isprint(c)) +# define isPUNCT_LC(c) (isascii(c) && ispunct(c)) +# define isSPACE_LC(c) (isascii(c) && isspace(c)) +# define isUPPER_LC(c) (isascii(c) && isupper(c)) +# define isWORDCHAR_LC(c) (isascii(c) && (isalnum(c) || (c) == '_')) +# define isXDIGIT_LC(c) (isascii(c) && isxdigit(c)) + +# define toLOWER_LC(c) (isascii(c) ? tolower(c) : (c)) +# define toUPPER_LC(c) (isascii(c) ? toupper(c) : (c)) +# define toFOLD_LC(c) (isascii(c) ? tolower(c) : (c)) -# endif -#endif /* USE_NEXT_CTYPE */ +#endif #define isIDCONT(c) isWORDCHAR(c) #define isIDCONT_A(c) isWORDCHAR_A(c)