# endif
#endif
-/* The NeXT dynamic loader headers will not build with the bool macro
- So declare them now to clear confusion.
-*/
-#if defined(NeXT) || defined(__NeXT__)
-# undef FALSE
-# undef TRUE
- typedef enum bool { FALSE = 0, TRUE = 1 } bool;
-# define ENUM_BOOL 1
-# ifndef HAS_BOOL
-# define HAS_BOOL 1
-# endif /* !HAS_BOOL */
-#endif /* NeXT || __NeXT__ */
-
#ifndef HAS_BOOL
# ifdef bool
# undef bool
the published Unicode rules; otherwise, it uses the C library function that
gives the named classification. For example, C<isDIGIT_LC()> when not in a
UTF-8 locale returns the result of calling C<isdigit()>. FALSE is always
-returned if the input won't fit into an octet.
+returned if the input won't fit into an octet. On some platforms where the C
+library function is known to be defective, Perl changes its result to follow
+the POSIX standard's rules.
Variant C<isFOO_LC_uvchr> is like C<isFOO_LC>, but is defined on any UV. It
returns the same as C<isFOO_LC> for input code points less than 256, and
alphanumeric.
See the L<top of this section|/Character classification> for an explanation of
variants
-C<isWORDCHAR_A>, C<isWORDCHAR_L1>, C<isWORDCHAR_uni>, C<isWORDCHAR_utf8>,
-C<isWORDCHAR_LC>, C<isWORDCHAR_LC_uvchr>, and C<isWORDCHAR_LC_utf8>.
+C<isWORDCHAR_A>, C<isWORDCHAR_L1>, C<isWORDCHAR_uni>, and C<isWORDCHAR_utf8>.
+C<isWORDCHAR_LC>, C<isWORDCHAR_LC_uvchr>, and C<isWORDCHAR_LC_utf8> are also as
+described there, but additionally include the platform's native underscore.
=for apidoc Am|bool|isXDIGIT|char ch
Returns a boolean indicating whether the specified character is a hexadecimal
# define _CC_QUOTEMETA 21
# define _CC_NON_FINAL_FOLD 22
# define _CC_IS_IN_SOME_FOLD 23
-# define _CC_BACKSLASH_FOO_LBRACE_IS_META 31 /* temp, see mk_PL_charclass.pl */
-/* Unused: 24-30
+# define _CC_MNEMONIC_CNTRL 24
+/* Unused: 25-31
* If more bits are needed, one could add a second word for non-64bit
* QUAD_IS_INT systems, using some #ifdefs to distinguish between having a 2nd
* word or not. The IS_IN_SOME_FOLD bit is the most easily expendable, as it
# define isALPHANUMERIC_A(c) _generic_isCC_A(c, _CC_ALPHANUMERIC)
# define isBLANK_A(c) _generic_isCC_A(c, _CC_BLANK)
# define isCNTRL_A(c) _generic_isCC_A(c, _CC_CNTRL)
-# define isDIGIT_A(c) _generic_isCC(c, _CC_DIGIT)
+# define isDIGIT_A(c) _generic_isCC(c, _CC_DIGIT) /* No non-ASCII digits */
# define isGRAPH_A(c) _generic_isCC_A(c, _CC_GRAPH)
# define isLOWER_A(c) _generic_isCC_A(c, _CC_LOWER)
# define isPRINT_A(c) _generic_isCC_A(c, _CC_PRINT)
# define isSPACE_A(c) _generic_isCC_A(c, _CC_SPACE)
# define isUPPER_A(c) _generic_isCC_A(c, _CC_UPPER)
# define isWORDCHAR_A(c) _generic_isCC_A(c, _CC_WORDCHAR)
-# define isXDIGIT_A(c) _generic_isCC(c, _CC_XDIGIT)
+# define isXDIGIT_A(c) _generic_isCC(c, _CC_XDIGIT) /* No non-ASCII xdigits */
# define isIDFIRST_A(c) _generic_isCC_A(c, _CC_IDFIRST)
# define isALPHA_L1(c) _generic_isCC(c, _CC_ALPHA)
# define isALPHANUMERIC_L1(c) _generic_isCC(c, _CC_ALPHANUMERIC)
_generic_isCC(c, _CC_NON_FINAL_FOLD)
# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
_generic_isCC(c, _CC_IS_IN_SOME_FOLD)
+# define _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
+ _generic_isCC(c, _CC_MNEMONIC_CNTRL)
#else /* else we don't have perl.h */
/* If we don't have perl.h, we are compiling a utility program. Below we
_generic_toLOWER_LC(c, function, cast)))
/* Use the libc versions for these if available. */
-#if defined(HAS_ISASCII) && ! defined(USE_NEXT_CTYPE)
+#if defined(HAS_ISASCII)
# define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii( (U8) (c)))
#else
# define isASCII_LC(c) isASCII(c)
#endif
-#if defined(HAS_ISBLANK) && ! defined(USE_NEXT_CTYPE)
+#if defined(HAS_ISBLANK)
# define isBLANK_LC(c) _generic_LC(c, _CC_BLANK, isblank)
#else /* Unlike isASCII, varies if in a UTF-8 locale */
# define isBLANK_LC(c) (IN_UTF8_CTYPE_LOCALE) ? isBLANK_L1(c) : isBLANK(c)
#endif
-#ifdef USE_NEXT_CTYPE /* NeXT computers */
-
-# define _LC_CAST unsigned int /* Needed by _generic_LC. NeXT functions
- use this as their input type */
-
-# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, NXIsAlpha)
-# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, NXIsAlNum)
-# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, NXIsCntrl)
-# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, NXIsDigit)
-# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, NXIsGraph)
-# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, NXIsAlpha)
-# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, NXIsLower)
-# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, NXIsPrint)
-# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, NXIsPunct)
-# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, NXIsSpace)
-# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, NXIsUpper)
-# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, NXIsAlNum)
-# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, NXIsXdigit)
-
-# define toLOWER_LC(c) _generic_toLOWER_LC((c), NXToLower, unsigned int)
-# define toUPPER_LC(c) _generic_toUPPER_LC((c), NXToUpper, unsigned int)
-# define toFOLD_LC(c) _generic_toFOLD_LC((c), NXToLower, unsigned int)
-
-#else /* !USE_NEXT_CTYPE */
+#define _LC_CAST U8
-# define _LC_CAST U8
-
-# ifdef WIN32
+#ifdef WIN32
/* The Windows functions don't bother to follow the POSIX standard, which
* for example says that something can't both be a printable and a control.
* But Windows treats the \t control as a printable, and does such things
* Not all possible weirdnesses are checked for, just the ones that were
* detected on actual Microsoft code pages */
-# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl)
-# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace)
-
-# define isALPHA_LC(c) (_generic_LC(c, _CC_ALPHA, isalpha) && isALPHANUMERIC_LC(c))
-# define isALPHANUMERIC_LC(c) (_generic_LC(c, _CC_ALPHANUMERIC, isalnum) && ! isPUNCT_LC(c))
-# define isDIGIT_LC(c) (_generic_LC(c, _CC_DIGIT, isdigit) && isALPHANUMERIC_LC(c))
-# define isGRAPH_LC(c) (_generic_LC(c, _CC_GRAPH, isgraph) && isPRINT_LC(c))
-# define isIDFIRST_LC(c) (((c) == '_') || (_generic_LC(c, _CC_IDFIRST, isalpha) && ! isPUNCT_LC(c)))
-# define isLOWER_LC(c) (_generic_LC(c, _CC_LOWER, islower) && isALPHA_LC(c))
-# define isPRINT_LC(c) (_generic_LC(c, _CC_PRINT, isprint) && ! isCNTRL_LC(c))
-# define isPUNCT_LC(c) (_generic_LC(c, _CC_PUNCT, ispunct) && ! isCNTRL_LC(c))
-# define isUPPER_LC(c) (_generic_LC(c, _CC_UPPER, isupper) && isALPHA_LC(c))
-# define isWORDCHAR_LC(c) (((c) == '_') || isALPHANUMERIC_LC(c))
-# define isXDIGIT_LC(c) (_generic_LC(c, _CC_XDIGIT, isxdigit) && isALPHANUMERIC_LC(c))
-
-# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8)
-# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8)
-# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8)
-
-# elif defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII))
+# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl)
+# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace)
+
+# define isALPHA_LC(c) (_generic_LC(c, _CC_ALPHA, isalpha) && isALPHANUMERIC_LC(c))
+# define isALPHANUMERIC_LC(c) (_generic_LC(c, _CC_ALPHANUMERIC, isalnum) && ! isPUNCT_LC(c))
+# define isDIGIT_LC(c) (_generic_LC(c, _CC_DIGIT, isdigit) && isALPHANUMERIC_LC(c))
+# define isGRAPH_LC(c) (_generic_LC(c, _CC_GRAPH, isgraph) && isPRINT_LC(c))
+# define isIDFIRST_LC(c) (((c) == '_') || (_generic_LC(c, _CC_IDFIRST, isalpha) && ! isPUNCT_LC(c)))
+# define isLOWER_LC(c) (_generic_LC(c, _CC_LOWER, islower) && isALPHA_LC(c))
+# define isPRINT_LC(c) (_generic_LC(c, _CC_PRINT, isprint) && ! isCNTRL_LC(c))
+# define isPUNCT_LC(c) (_generic_LC(c, _CC_PUNCT, ispunct) && ! isCNTRL_LC(c))
+# define isUPPER_LC(c) (_generic_LC(c, _CC_UPPER, isupper) && isALPHA_LC(c))
+# define isWORDCHAR_LC(c) (((c) == '_') || isALPHANUMERIC_LC(c))
+# define isXDIGIT_LC(c) (_generic_LC(c, _CC_XDIGIT, isxdigit) && isALPHANUMERIC_LC(c))
+
+# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8)
+# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8)
+# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8)
+
+#elif defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII))
/* For most other platforms */
-# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, isalpha)
-# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, isalnum)
-# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl)
-# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, isdigit)
-# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, isgraph)
-# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, isalpha)
-# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, islower)
-# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, isprint)
-# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, ispunct)
-# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace)
-# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, isupper)
-# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, isalnum)
-# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, isxdigit)
-
-
-# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8)
-# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8)
-# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8)
-
-# else /* The final fallback position */
-
-# define isALPHA_LC(c) (isascii(c) && isalpha(c))
-# define isALPHANUMERIC_LC(c) (isascii(c) && isalnum(c))
-# define isCNTRL_LC(c) (isascii(c) && iscntrl(c))
-# define isDIGIT_LC(c) (isascii(c) && isdigit(c))
-# define isGRAPH_LC(c) (isascii(c) && isgraph(c))
-# define isIDFIRST_LC(c) (isascii(c) && (isalpha(c) || (c) == '_'))
-# define isLOWER_LC(c) (isascii(c) && islower(c))
-# define isPRINT_LC(c) (isascii(c) && isprint(c))
-# define isPUNCT_LC(c) (isascii(c) && ispunct(c))
-# define isSPACE_LC(c) (isascii(c) && isspace(c))
-# define isUPPER_LC(c) (isascii(c) && isupper(c))
-# define isWORDCHAR_LC(c) (isascii(c) && (isalnum(c) || (c) == '_'))
-# define isXDIGIT_LC(c) (isascii(c) && isxdigit(c))
-
-# define toLOWER_LC(c) (isascii(c) ? tolower(c) : (c))
-# define toUPPER_LC(c) (isascii(c) ? toupper(c) : (c))
-# define toFOLD_LC(c) (isascii(c) ? tolower(c) : (c))
+# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, isalpha)
+# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, isalnum)
+# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl)
+# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, isdigit)
+# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, isgraph)
+# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, isalpha)
+# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, islower)
+# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, isprint)
+# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, ispunct)
+# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace)
+# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, isupper)
+# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, isalnum)
+# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, isxdigit)
+
+
+# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8)
+# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8)
+# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8)
+
+#else /* The final fallback position */
+
+# define isALPHA_LC(c) (isascii(c) && isalpha(c))
+# define isALPHANUMERIC_LC(c) (isascii(c) && isalnum(c))
+# define isCNTRL_LC(c) (isascii(c) && iscntrl(c))
+# define isDIGIT_LC(c) (isascii(c) && isdigit(c))
+# define isGRAPH_LC(c) (isascii(c) && isgraph(c))
+# define isIDFIRST_LC(c) (isascii(c) && (isalpha(c) || (c) == '_'))
+# define isLOWER_LC(c) (isascii(c) && islower(c))
+# define isPRINT_LC(c) (isascii(c) && isprint(c))
+# define isPUNCT_LC(c) (isascii(c) && ispunct(c))
+# define isSPACE_LC(c) (isascii(c) && isspace(c))
+# define isUPPER_LC(c) (isascii(c) && isupper(c))
+# define isWORDCHAR_LC(c) (isascii(c) && (isalnum(c) || (c) == '_'))
+# define isXDIGIT_LC(c) (isascii(c) && isxdigit(c))
+
+# define toLOWER_LC(c) (isascii(c) ? tolower(c) : (c))
+# define toUPPER_LC(c) (isascii(c) ? toupper(c) : (c))
+# define toFOLD_LC(c) (isascii(c) ? tolower(c) : (c))
-# endif
-#endif /* USE_NEXT_CTYPE */
+#endif
#define isIDCONT(c) isWORDCHAR(c)
#define isIDCONT_A(c) isWORDCHAR_A(c)
* both ASCII and EBCDIC the last 3 bits of the octal digits range from 0-7. */
#define OCTAL_VALUE(c) (__ASSERT_(isOCTAL(c)) (7 & (c)))
+/* Efficiently returns a boolean as to if two native characters are equivalent
+ * case-insenstively. At least one of the characters must be one of [A-Za-z];
+ * the ALPHA in the name is to remind you of that. This is asserted() in
+ * DEBUGGING builds. Because [A-Za-z] are invariant under UTF-8, this macro
+ * works (on valid input) for both non- and UTF-8-encoded bytes.
+ *
+ * When one of the inputs is a compile-time constant and gets folded by the
+ * compiler, this reduces to an AND and a TEST. On both EBCDIC and ASCII
+ * machines, 'A' and 'a' differ by a single bit; the same with the upper and
+ * lower case of all other ASCII-range alphabetics. On ASCII platforms, they
+ * are 32 apart; on EBCDIC, they are 64. At compile time, this uses an
+ * exclusive 'or' to find that bit and then inverts it to form a mask, with
+ * just a single 0, in the bit position where the upper- and lowercase differ.
+ * */
+#define isALPHA_FOLD_EQ(c1, c2) \
+ (__ASSERT_(isALPHA_A(c1) || isALPHA_A(c2)) \
+ ((c1) & ~('A' ^ 'a')) == ((c2) & ~('A' ^ 'a')))
+#define isALPHA_FOLD_NE(c1, c2) (! isALPHA_FOLD_EQ((c1), (c2)))
+
/*
=head1 Memory Management