*
*/
+/* IMPORTANT NOTE: Everything whose name begins with an underscore is for
+ * internal core Perl use only. */
+
#ifndef HANDY_H /* Guard against nested #inclusion */
#define HANDY_H
=head1 Handy Values
=for apidoc AmU||Nullch
-Null character pointer. (No longer available when C<PERL_CORE> is defined.)
+Null character pointer. (No longer available when C<PERL_CORE> is
+defined.)
=for apidoc AmU||Nullsv
-Null SV pointer. (No longer available when C<PERL_CORE> is defined.)
+Null SV pointer. (No longer available when C<PERL_CORE> is defined.)
=cut
*/
#define Ctl(ch) ((ch) & 037)
+/* This is a helper macro to avoid preprocessor issues, replaced by nothing
+ * unless under DEBUGGING, where it expands to an assert of its argument,
+ * followed by a comma (hence the comma operator). If we just used a straight
+ * assert(), we would get a comma with nothing before it when not DEBUGGING */
+#ifdef DEBUGGING
+# define __ASSERT_(statement) assert(statement),
+#else
+# define __ASSERT_(statement)
+#endif
+
/*
=head1 SV-Body Allocation
=for apidoc Am|bool|strnNE|char* s1|char* s2|STRLEN len
Test two strings to see if they are different. The C<len> parameter
-indicates the number of bytes to compare. Returns true or false. (A
+indicates the number of bytes to compare. Returns true or false. (A
wrapper for C<strncmp>).
=for apidoc Am|bool|strnEQ|char* s1|char* s2|STRLEN len
Test two strings to see if they are equal. The C<len> parameter indicates
-the number of bytes to compare. Returns true or false. (A wrapper for
+the number of bytes to compare. Returns true or false. (A wrapper for
C<strncmp>).
=cut
since ASCII is a subset of Latin-1. But the non-ASCII code points are treated
as if they are Latin-1 characters. For example, C<isWORDCHAR_L1()> will return
true when called with the code point 0xDF, which is a word character in both
-ASCII and EBCDIC (though it represent different characters in each).
+ASCII and EBCDIC (though it represents different characters in each).
Variant C<isFOO_uni> is like the C<isFOO_L1> variant, but accepts any UV code
point as input. If the code point is larger than 255, Unicode rules are used
classification of just the first (possibly multi-byte) character in the string
is tested.
-Variant C<isFOO_LC> is like the C<isFOO_A> and C<isFOO_L1> variants, but uses
-the C library function that gives the named classification instead of
-hard-coded rules. For example, C<isDIGIT_LC()> returns the result of calling
-C<isdigit()>. This means that the result is based on the current locale, which
-is what C<LC> in the name stands for. FALSE is always returned if the input
-won't fit into an octet.
+Variant C<isFOO_LC> is like the C<isFOO_A> and C<isFOO_L1> variants, but the
+result is based on the current locale, which is what C<LC> in the name stands
+for. If Perl can determine that the current locale is a UTF-8 locale, it uses
+the published Unicode rules; otherwise, it uses the C library function that
+gives the named classification. For example, C<isDIGIT_LC()> when not in a
+UTF-8 locale returns the result of calling C<isdigit()>. FALSE is always
+returned if the input won't fit into an octet.
Variant C<isFOO_LC_uvchr> is like C<isFOO_LC>, but is defined on any UV. It
returns the same as C<isFOO_LC> for input code points less than 256, and
/* We could be called without perl.h, in which case NATIVE_TO_ASCII() is
* likely not defined, and so we use the native function */
-# define isASCII(c) isascii(c)
+# define isASCII(c) cBOOL(isascii(c))
#else
# define isASCII(c) ((WIDEST_UTYPE)(c) < 128)
#endif
/* The 1U keeps Solaris from griping when shifting sets the uppermost bit */
# define _CC_mask(classnum) (1U << (classnum))
+
+ /* For internal core Perl use only: the base macro for defining macros like
+ * isALPHA */
# define _generic_isCC(c, classnum) cBOOL(FITS_IN_8_BITS(c) \
&& (PL_charclass[(U8) (c)] & _CC_mask(classnum)))
* ASCII. */
# define _CC_mask_A(classnum) (_CC_mask(classnum) | _CC_mask(_CC_ASCII))
- /* The _A version makes sure that both the desired bit and the ASCII bit
- * are present */
+ /* For internal core Perl use only: the base macro for defining macros like
+ * isALPHA_A. The foo_A version makes sure that both the desired bit and
+ * the ASCII bit are present */
# define _generic_isCC_A(c, classnum) (FITS_IN_8_BITS(c) \
&& ((PL_charclass[(U8) (c)] & _CC_mask_A(classnum)) \
== _CC_mask_A(classnum)))
* But by creating these definitions, other code doesn't have to be aware of
* this detail */
#define toFOLD(c) toLOWER(c)
-#define toFOLD_LC(c) toLOWER_LC(c)
#define toTITLE(c) toUPPER(c)
#define toLOWER_A(c) toLOWER(c)
#define toFOLD_A(c) toFOLD(c)
#define toTITLE_A(c) toTITLE(c)
-/* Use table lookup for speed; return error character for input
- * out-of-range */
+/* Use table lookup for speed; returns the input itself if is out-of-range */
#define toLOWER_LATIN1(c) ((! FITS_IN_8_BITS(c)) \
? (c) \
: PL_latin1_lc[ (U8) (c) ])
#define toLOWER_L1(c) toLOWER_LATIN1(c) /* Synonym for consistency */
/* Modified uc. Is correct uc except for three non-ascii chars which are
- * all mapped to one of them, and these need special handling; error
- * character for input out-of-range */
+ * all mapped to one of them, and these need special handling; returns the
+ * input itself if is out-of-range */
#define toUPPER_LATIN1_MOD(c) ((! FITS_IN_8_BITS(c)) \
? (c) \
: PL_mod_latin1_uc[ (U8) (c) ])
-#ifdef USE_NEXT_CTYPE
-
-# define isALPHANUMERIC_LC(c) NXIsAlNum((unsigned int)(c))
-# define isALPHA_LC(c) NXIsAlpha((unsigned int)(c))
-# define isASCII_LC(c) isASCII((unsigned int)(c))
-# define isBLANK_LC(c) isBLANK((unsigned int)(c))
-# define isCNTRL_LC(c) NXIsCntrl((unsigned int)(c))
-# define isDIGIT_LC(c) NXIsDigit((unsigned int)(c))
-# define isGRAPH_LC(c) NXIsGraph((unsigned int)(c))
-# define isIDFIRST_LC(c) (NXIsAlpha((unsigned int)(c)) || (char)(c) == '_')
-# define isLOWER_LC(c) NXIsLower((unsigned int)(c))
-# define isPRINT_LC(c) NXIsPrint((unsigned int)(c))
-# define isPUNCT_LC(c) NXIsPunct((unsigned int)(c))
-# define isSPACE_LC(c) NXIsSpace((unsigned int)(c))
-# define isUPPER_LC(c) NXIsUpper((unsigned int)(c))
-# define isWORDCHAR_LC(c) (NXIsAlNum((unsigned int)(c)) || (char)(c) == '_')
-# define isXDIGIT_LC(c) NXIsXDigit((unsigned int)(c))
-# define toLOWER_LC(c) NXToLower((unsigned int)(c))
-# define toUPPER_LC(c) NXToUpper((unsigned int)(c))
+#define IN_UTF8_CTYPE_LOCALE PL_in_utf8_CTYPE_locale
+
+/* Use foo_LC_uvchr() instead of these for beyond the Latin1 range */
+
+/* For internal core Perl use only: the base macro for defining macros like
+ * isALPHA_LC, which uses the current LC_CTYPE locale. 'c' is the code point
+ * (0-255) to check. In a UTF-8 locale, the result is the same as calling
+ * isFOO_L1(); the 'utf8_locale_classnum' parameter is something like
+ * _CC_UPPER, which gives the class number for doing this. For non-UTF-8
+ * locales, the code to actually do the test this is passed in 'non_utf8'. If
+ * 'c' is above 255, 0 is returned. For accessing the full range of possible
+ * code points under locale rules, use the macros based on _generic_LC_uvchr
+ * instead of this. */
+#define _generic_LC_base(c, utf8_locale_classnum, non_utf8) \
+ (! FITS_IN_8_BITS(c) \
+ ? 0 \
+ : IN_UTF8_CTYPE_LOCALE \
+ ? cBOOL(PL_charclass[(U8) (c)] & _CC_mask(utf8_locale_classnum)) \
+ : cBOOL(non_utf8))
+
+/* For internal core Perl use only: a helper macro for defining macros like
+ * isALPHA_LC. 'c' is the code point (0-255) to check. The function name to
+ * actually do this test is passed in 'non_utf8_func', which is called on 'c',
+ * casting 'c' to the macro _LC_CAST, which should not be parenthesized. See
+ * _generic_LC_base for more info */
+#define _generic_LC(c, utf8_locale_classnum, non_utf8_func) \
+ _generic_LC_base(c,utf8_locale_classnum, \
+ non_utf8_func( (_LC_CAST) (c)))
+
+/* For internal core Perl use only: like _generic_LC, but also returns TRUE if
+ * 'c' is the platform's native underscore character */
+#define _generic_LC_underscore(c,utf8_locale_classnum,non_utf8_func) \
+ _generic_LC_base(c, utf8_locale_classnum, \
+ (non_utf8_func( (_LC_CAST) (c)) \
+ || (char)(c) == '_'))
+
+/* These next three are also for internal core Perl use only: case-change
+ * helper macros */
+#define _generic_toLOWER_LC(c, function, cast) (! FITS_IN_8_BITS(c) \
+ ? (c) \
+ : (IN_UTF8_CTYPE_LOCALE) \
+ ? PL_latin1_lc[ (U8) (c) ] \
+ : function((cast)(c)))
+
+/* Note that the result can be larger than a byte in a UTF-8 locale. It
+ * returns a single value, so can't adequately return the upper case of LATIN
+ * SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two
+ * values "SS"); instead it asserts against that under DEBUGGING, and
+ * otherwise returns its input */
+#define _generic_toUPPER_LC(c, function, cast) \
+ (! FITS_IN_8_BITS(c) \
+ ? (c) \
+ : ((! IN_UTF8_CTYPE_LOCALE) \
+ ? function((cast)(c)) \
+ : ((((U8)(c)) == MICRO_SIGN) \
+ ? GREEK_CAPITAL_LETTER_MU \
+ : ((((U8)(c)) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) \
+ ? LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS \
+ : ((((U8)(c)) == LATIN_SMALL_LETTER_SHARP_S) \
+ ? (__ASSERT_(0) (c)) \
+ : PL_mod_latin1_uc[ (U8) (c) ])))))
+
+/* Note that the result can be larger than a byte in a UTF-8 locale. It
+ * returns a single value, so can't adequately return the fold case of LATIN
+ * SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two
+ * values "ss"); instead it asserts against that under DEBUGGING, and
+ * otherwise returns its input */
+#define _generic_toFOLD_LC(c, function, cast) \
+ ((UNLIKELY((c) == MICRO_SIGN) && IN_UTF8_CTYPE_LOCALE) \
+ ? GREEK_SMALL_LETTER_MU \
+ : (__ASSERT_(! IN_UTF8_CTYPE_LOCALE \
+ || (c) != LATIN_SMALL_LETTER_SHARP_S) \
+ _generic_toLOWER_LC(c, function, cast)))
+
+/* Use the libc versions for these if available. */
+#if defined(HAS_ISASCII) && ! defined(USE_NEXT_CTYPE)
+# define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii( (U8) (c)))
+#else
+# define isASCII_LC(c) isASCII(c)
+#endif
+
+#if defined(HAS_ISBLANK) && ! defined(USE_NEXT_CTYPE)
+# define isBLANK_LC(c) _generic_LC(c, _CC_BLANK, isblank)
+#else /* Unlike isASCII, varies if in a UTF-8 locale */
+# define isBLANK_LC(c) (IN_UTF8_CTYPE_LOCALE) ? isBLANK_L1(c) : isBLANK(c)
+#endif
+
+#ifdef USE_NEXT_CTYPE /* NeXT computers */
+
+# define _LC_CAST unsigned int /* Needed by _generic_LC. NeXT functions
+ use this as their input type */
+
+# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, NXIsAlpha)
+# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, NXIsAlNum)
+# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, NXIsCntrl)
+# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, NXIsDigit)
+# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, NXIsGraph)
+# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, NXIsAlpha)
+# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, NXIsLower)
+# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, NXIsPrint)
+# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, NXIsPunct)
+# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, NXIsSpace)
+# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, NXIsUpper)
+# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, NXIsAlNum)
+# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, NXIsXdigit)
+
+# define toLOWER_LC(c) _generic_toLOWER_LC((c), NXToLower, unsigned int)
+# define toUPPER_LC(c) _generic_toUPPER_LC((c), NXToUpper, unsigned int)
+# define toFOLD_LC(c) _generic_toFOLD_LC((c), NXToLower, unsigned int)
#else /* !USE_NEXT_CTYPE */
+# define _LC_CAST U8
+
# if defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII))
+ /* For most other platforms */
-/* Use foo_LC_uvchr() instead of these for beyond the Latin1 range */
+# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, isalpha)
+# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, isalnum)
+# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl)
+# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, isdigit)
+# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, isgraph)
+# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, isalpha)
+# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, islower)
+# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, isprint)
+# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, ispunct)
+# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace)
+# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, isupper)
+# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, isalnum)
+# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, isxdigit)
-# define isALPHA_LC(c) (FITS_IN_8_BITS(c) && isalpha((unsigned char)(c)))
-# define isALPHANUMERIC_LC(c) (FITS_IN_8_BITS(c) \
- && isalnum((unsigned char)(c)))
-# ifdef HAS_ISASCII
-# define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii((unsigned char)(c)))
-# else
-# define isASCII_LC(c) (FITS_IN_8_BITS(c) && isASCII((unsigned char)(c)))
-# endif
-# ifdef HAS_ISBLANK
-# define isBLANK_LC(c) (FITS_IN_8_BITS(c) && isblank((unsigned char)(c)))
-# else
-# define isBLANK_LC(c) (FITS_IN_8_BITS(c) && isBLANK((unsigned char)(c)))
-# endif
-# define isCNTRL_LC(c) (FITS_IN_8_BITS(c) && iscntrl((unsigned char)(c)))
-# define isDIGIT_LC(c) (FITS_IN_8_BITS(c) && isdigit((unsigned char)(c)))
-# define isGRAPH_LC(c) (FITS_IN_8_BITS(c) && isgraph((unsigned char)(c)))
-# define isIDFIRST_LC(c) (FITS_IN_8_BITS(c) \
- && (isalpha((unsigned char)(c)) || (char)(c) == '_'))
-# define isLOWER_LC(c) (FITS_IN_8_BITS(c) && islower((unsigned char)(c)))
-# define isPRINT_LC(c) (FITS_IN_8_BITS(c) && isprint((unsigned char)(c)))
-# define isPUNCT_LC(c) (FITS_IN_8_BITS(c) && ispunct((unsigned char)(c)))
-# define isSPACE_LC(c) (FITS_IN_8_BITS(c) && isspace((unsigned char)(c)))
-# define isUPPER_LC(c) (FITS_IN_8_BITS(c) && isupper((unsigned char)(c)))
-# define isWORDCHAR_LC(c) (FITS_IN_8_BITS(c) \
- && (isalnum((unsigned char)(c)) || (char)(c) == '_'))
-# define isXDIGIT_LC(c) (FITS_IN_8_BITS(c) && isxdigit((unsigned char)(c)))
-# define toLOWER_LC(c) (FITS_IN_8_BITS(c) ? (UV)tolower((unsigned char)(c)) : (c))
-# define toUPPER_LC(c) (FITS_IN_8_BITS(c) ? (UV)toupper((unsigned char)(c)) : (c))
-# else
+# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8)
+# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8)
+# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8)
+
+# else /* The final fallback position */
# define isALPHA_LC(c) (isascii(c) && isalpha(c))
# define isALPHANUMERIC_LC(c) (isascii(c) && isalnum(c))
-# define isASCII_LC(c) isascii(c)
-# ifdef HAS_ISBLANK
-# define isBLANK_LC(c) (isascii(c) && isblank(c))
-# else
-# define isBLANK_LC(c) isBLANK_A(c)
-# endif
# define isCNTRL_LC(c) (isascii(c) && iscntrl(c))
# define isDIGIT_LC(c) (isascii(c) && isdigit(c))
# define isGRAPH_LC(c) (isascii(c) && isgraph(c))
# define isUPPER_LC(c) (isascii(c) && isupper(c))
# define isWORDCHAR_LC(c) (isascii(c) && (isalnum(c) || (c) == '_'))
# define isXDIGIT_LC(c) (isascii(c) && isxdigit(c))
+
# define toLOWER_LC(c) (isascii(c) ? tolower(c) : (c))
# define toUPPER_LC(c) (isascii(c) ? toupper(c) : (c))
+# define toFOLD_LC(c) (isascii(c) ? tolower(c) : (c))
# endif
#endif /* USE_NEXT_CTYPE */
#define isIDCONT_LC(c) isWORDCHAR_LC(c)
#define isPSXSPC_LC(c) isSPACE_LC(c)
-/* For internal core Perl use only. If the input is Latin1, use the Latin1
- * macro; otherwise use the function 'above_latin1'. Won't compile if 'c' isn't unsigned, as
- * won't match above_latin1 prototype. The macros do bounds checking, so have
- * duplicate checks here, so could create versions of the macros that don't,
- * but experiments show that gcc optimizes them out anyway. */
+/* For internal core Perl use only: the base macros for defining macros like
+ * isALPHA_uni. 'c' is the code point to check. 'classnum' is the POSIX class
+ * number defined earlier in this file. _generic_uni() is used for POSIX
+ * classes where there is a macro or function 'above_latin1' that takes the
+ * single argument 'c' and returns the desired value. These exist for those
+ * classes which have simple definitions, avoiding the overhead of a hash
+ * lookup or inversion list binary search. _generic_swash_uni() can be used
+ * for classes where that overhead is faster than a direct lookup.
+ * _generic_uni() won't compile if 'c' isn't unsigned, as it won't match the
+ * 'above_latin1' prototype. _generic_isCC() macro does bounds checking, so
+ * have duplicate checks here, so could create versions of the macros that
+ * don't, but experiments show that gcc optimizes them out anyway. */
/* Note that all ignore 'use bytes' */
#define _generic_uni(classnum, above_latin1, c) ((c) < 256 \
#define toTITLE_uni(c,s,l) to_uni_title(c,s,l)
#define toUPPER_uni(c,s,l) to_uni_upper(c,s,l)
+/* For internal core Perl use only: the base macros for defining macros like
+ * isALPHA_LC_uvchr. These are like isALPHA_LC, but the input can be any code
+ * point, not just 0-255. Like _generic_uni, there are two versions, one for
+ * simple class definitions; the other for more complex. These are like
+ * _generic_uni, so see it for more info. */
#define _generic_LC_uvchr(latin1, above_latin1, c) \
(c < 256 ? latin1(c) : above_latin1(c))
#define _generic_LC_swash_uvchr(latin1, classnum, c) \
#define isBLANK_LC_uni(c) isBLANK_LC_uvchr(UNI_TO_NATIVE(c))
-/* Everything whose name begins with an underscore is for internal core Perl
- * use only. */
-
-/* If the input is in the Latin1 range, use
- * the Latin1 macro 'classnum' on 'p' which is a pointer to a UTF-8 string.
- * Otherwise use the value given by the 'utf8' parameter. This relies on the
- * fact that ASCII characters have the same representation whether utf8 or not.
- * Note that it assumes that the utf8 has been validated, and ignores 'use
- * bytes' */
+/* For internal core Perl use only: the base macros for defining macros like
+ * isALPHA_utf8. These are like the earlier defined macros, but take an input
+ * UTF-8 encoded string 'p'. If the input is in the Latin1 range, use
+ * the Latin1 macro 'classnum' on 'p'. Otherwise use the value given by the
+ * 'utf8' parameter. This relies on the fact that ASCII characters have the
+ * same representation whether utf8 or not. Note that it assumes that the utf8
+ * has been validated, and ignores 'use bytes' */
#define _generic_utf8(classnum, p, utf8) (UTF8_IS_INVARIANT(*(p)) \
? _generic_isCC(*(p), classnum) \
: (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
* can be a macro */
#define _generic_func_utf8(classnum, above_latin1, p) \
_generic_utf8(classnum, p, above_latin1(p))
-/* Like the above, but passes classnum to _isFOO_utf8(), instead of having a
+/* Like the above, but passes classnum to _isFOO_utf8(), instead of having an
* 'above_latin1' parameter */
#define _generic_swash_utf8(classnum, p) \
_generic_utf8(classnum, p, _is_utf8_FOO(classnum, p))
#define toTITLE_utf8(p,s,l) to_utf8_title(p,s,l)
#define toUPPER_utf8(p,s,l) to_utf8_upper(p,s,l)
-/* For internal core Perl use only. If the input is in the Latin1 range, use
- * the macro 'macro' on 'p' which is a pointer to a UTF-8 string. Otherwise
- * use the value given by the 'utf8' parameter. This relies on the fact that
- * ASCII characters have the same representation whether utf8 or not. Note
- * that it assumes that the utf8 has been validated, and ignores 'use bytes' */
+/* For internal core Perl use only: the base macros for defining macros like
+ * isALPHA_LC_utf8. These are like _generic_utf8, but if the first code point
+ * in 'p' is within the 0-255 range, it uses locale rules from the passed-in
+ * 'macro' parameter */
#define _generic_LC_utf8(macro, p, utf8) \
(UTF8_IS_INVARIANT(*(p)) \
? macro(*(p)) \
#define _generic_LC_swash_utf8(macro, classnum, p) \
_generic_LC_utf8(macro, p, _is_utf8_FOO(classnum, p))
#define _generic_LC_func_utf8(macro, above_latin1, p) \
- _generic_LC_utf8(macro, p, above_latin1(p))
+ _generic_LC_utf8(macro, p, above_latin1(p))
#define isALPHANUMERIC_LC_utf8(p) _generic_LC_swash_utf8(isALPHANUMERIC_LC, \
_CC_ALPHANUMERIC, p)
#define isALNUMC_utf8(p) isALPHANUMERIC_utf8(p)
#define isALNUMC_LC_utf8(p) isALPHANUMERIC_LC_utf8(p)
-/* This conversion works both ways, strangely enough. On EBCDIC platforms,
- * CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII, except that they don't
- * necessarily mean the same characters, e.g. CTRL-D is 4 on both systems, but
- * that is EOT on ASCII; ST on EBCDIC */
-# define toCTRL(c) (toUPPER(NATIVE_TO_LATIN1(c)) ^ 64)
+/* On EBCDIC platforms, CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII,
+ * except that they don't necessarily mean the same characters, e.g. CTRL-D is
+ * 4 on both systems, but that is EOT on ASCII; ST on EBCDIC.
+ * '?' is special-cased on EBCDIC to APC, which is the control there that is
+ * the outlier from the block that contains the other controls, just like
+ * toCTRL('?') on ASCII yields DEL, the control that is the outlier from the C0
+ * block. If it weren't special cased, it would yield a non-control.
+ * The conversion works both ways, so CTRL('D') is 4, and CTRL(4) is D, etc. */
+#ifndef EBCDIC
+# define toCTRL(c) (toUPPER(c) ^ 64)
+#else
+# define toCTRL(c) ((c) == '?' \
+ ? LATIN1_TO_NATIVE(0x9F) \
+ : (c) == LATIN1_TO_NATIVE(0x9F) \
+ ? '?' \
+ : (NATIVE_TO_LATIN1(toUPPER(c)) ^ 64))
+#endif
/* Line numbers are unsigned, 32 bits. */
typedef U32 line_t;
} \
return a;
-/* Converts a hex digit in a string to its numeric value, advancing the
- * pointer. The input must be known to be 0-9, A-F, or a-f. In both ASCII and
- * EBCDIC the last 4 bits of the digits are 0-9; and the last 4 bits of A-F and
- * a-f are 1-6, so adding 9 yields 10-15 */
-#define READ_XDIGIT(s) (0xf & (isDIGIT(*(s)) ? (*(s)++) : (*(s)++ + 9)))
+/* Converts a character known to represent a hexadecimal digit (0-9, A-F, or
+ * a-f) to its numeric value. READ_XDIGIT's argument is a string pointer,
+ * which is advanced. The input is validated only by an assert() in DEBUGGING
+ * builds. In both ASCII and EBCDIC the last 4 bits of the digits are 0-9; and
+ * the last 4 bits of A-F and a-f are 1-6, so adding 9 yields 10-15 */
+#define XDIGIT_VALUE(c) (__ASSERT_(isXDIGIT(c)) (0xf & (isDIGIT(c) \
+ ? (c) \
+ : ((c) + 9))))
+#define READ_XDIGIT(s) (__ASSERT_(isXDIGIT(*s)) (0xf & (isDIGIT(*(s)) \
+ ? (*(s)++) \
+ : (*(s)++ + 9))))
+
+/* Converts a character known to represent an octal digit (0-7) to its numeric
+ * value. The input is validated only by an assert() in DEBUGGING builds. In
+ * both ASCII and EBCDIC the last 3 bits of the octal digits range from 0-7. */
+#define OCTAL_VALUE(c) (__ASSERT_(isOCTAL(c)) (7 & (c)))
/*
=head1 Memory Management
C<type> is the type. Can do overlapping moves. See also C<Copy>.
=for apidoc Am|void *|MoveD|void* src|void* dest|int nitems|type
-Like C<Move> but returns dest. Useful for encouraging compilers to tail-call
+Like C<Move> but returns dest. Useful
+for encouraging compilers to tail-call
optimise.
=for apidoc Am|void|Copy|void* src|void* dest|int nitems|type
=for apidoc Am|void *|CopyD|void* src|void* dest|int nitems|type
-Like C<Copy> but returns dest. Useful for encouraging compilers to tail-call
+Like C<Copy> but returns dest. Useful
+for encouraging compilers to tail-call
optimise.
=for apidoc Am|void|Zero|void* dest|int nitems|type
=for apidoc Am|void *|ZeroD|void* dest|int nitems|type
-Like C<Zero> but returns dest. Useful for encouraging compilers to tail-call
+Like C<Zero> but returns dest. Useful
+for encouraging compilers to tail-call
optimise.
=for apidoc Am|void|StructCopy|type *src|type *dest|type