Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin)
digit.
+=for apidoc Am|bool|isOCTAL|char ch
+Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin)
+octal digit, [0-7].
+
=for apidoc Am|bool|isUPPER|char ch
Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin)
uppercase character.
Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin)
lowercase character.
+=head1 Character case changing
+
=for apidoc Am|char|toUPPER|char ch
Converts the specified character to uppercase. Characters outside the
US-ASCII (Basic Latin) range are viewed as not having any case.
*/
+/* FITS_IN_8_BITS(c) returns true if c occupies no more than 8 bits. It is
+ * designed to be hopefully bomb-proof, making sure that no bits of
+ * information are lost even on a 64-bit machine, but to get the compiler to
+ * optimize it out if possible. This is because Configure makes sure that the
+ * machine has an 8-bit byte, so if c is stored in a byte, the sizeof()
+ * guarantees that this evaluates to a constant true at compile time. The use
+ * of the mask instead of '< 256' keeps gcc from complaining that it is alway
+ * true, when c's storage class is a byte */
+#ifdef HAS_QUAD
+# define FITS_IN_8_BITS(c) ((sizeof(c) == 1) || (((U64)(c) & 0xFF) == (U64)(c)))
+#else
+# define FITS_IN_8_BITS(c) ((sizeof(c) == 1) || (((U32)(c) & 0xFF) == (U32)(c)))
+#endif
+
#define isALNUM(c) (isALPHA(c) || isDIGIT(c) || (c) == '_')
#define isIDFIRST(c) (isALPHA(c) || (c) == '_')
#define isALPHA(c) (isUPPER(c) || isLOWER(c))
#define isPSXSPC(c) (isSPACE(c) || (c) == '\v')
#define isBLANK(c) ((c) == ' ' || (c) == '\t')
#define isDIGIT(c) ((c) >= '0' && (c) <= '9')
+#define isOCTAL(c) ((c) >= '0' && (c) <= '7')
#ifdef EBCDIC
/* In EBCDIC we do not do locales: therefore() isupper() is fine. */
# define isUPPER(c) isupper(c)
# define isPUNCT(c) ispunct(c)
# define isXDIGIT(c) isxdigit(c)
# define toUPPER(c) toupper(c)
-# define toUPPER_LATIN1_MOD(c) UNI_TO_NATIVE(PL_mod_latin1_uc[(U8) NATIVE_TO_UNI(c)])
# define toLOWER(c) tolower(c)
-# define toLOWER_LATIN1(c) UNI_TO_NATIVE(PL_latin1_lc[(U8) NATIVE_TO_UNI(c)])
#else
# define isUPPER(c) ((c) >= 'A' && (c) <= 'Z')
# define isLOWER(c) ((c) >= 'a' && (c) <= 'z')
# define isPUNCT(c) (((c) >= 33 && (c) <= 47) || ((c) >= 58 && (c) <= 64) || ((c) >= 91 && (c) <= 96) || ((c) >= 123 && (c) <= 126))
# define isXDIGIT(c) (isDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
-/* Use table lookup for speed */
-# define toLOWER_LATIN1(c) (PL_latin1_lc[(U8) c])
-/* Modified uc. Is correct uc except for three non-ascii chars which are
- * all mapped to one of them, and these need special handling */
-# define toUPPER_LATIN1_MOD(c) (PL_mod_latin1_uc[(U8) c])
+ /* Use table lookup for speed; return error character for input
+ * out-of-range */
+# define toLOWER_LATIN1(c) (FITS_IN_8_BITS(c) \
+ ? UNI_TO_NATIVE(PL_latin1_lc[ \
+ NATIVE_TO_UNI( (U8) (c)) ]) \
+ : UNICODE_REPLACEMENT)
+ /* Modified uc. Is correct uc except for three non-ascii chars which are
+ * all mapped to one of them, and these need special handling; error
+ * character for input out-of-range */
+# define toUPPER_LATIN1_MOD(c) (FITS_IN_8_BITS(c) \
+ ? UNI_TO_NATIVE(PL_mod_latin1_uc[ \
+ NATIVE_TO_UNI( (U8) (c)) ]) \
+ : UNICODE_REPLACEMENT)
/* ASCII casing. */
# define toUPPER(c) (isLOWER(c) ? (c) - ('a' - 'A') : (c))