/*
=head1 Handy Values
-=for apidoc AmU||Nullch
+=for apidoc AmnU||Nullch
Null character pointer. (No longer available when C<PERL_CORE> is
defined.)
-=for apidoc AmU||Nullsv
+=for apidoc AmnU||Nullsv
Null SV pointer. (No longer available when C<PERL_CORE> is defined.)
=cut
# define HAS_BOOL 1
#endif
-/* cast-to-bool. A simple (bool) cast may not do the right thing: if bool is
- * defined as char for example, then the cast from int is
- * implementation-defined (bool)!!(cbool) in a ternary triggers a bug in xlc on
- * AIX */
+/*
+=for apidoc Am|bool|cBOOL|bool expr
+
+Cast-to-bool. A simple S<C<(bool) I<expr>>> cast may not do the right thing:
+if C<bool> is defined as C<char>, for example, then the cast from C<int> is
+implementation-defined.
+
+C<(bool)!!(cbool)> in a ternary triggers a bug in xlc on AIX
+
+=cut
+*/
#define cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0)
/* Try to figure out __func__ or __FUNCTION__ equivalent, if any.
# define isPOWER_OF_2(n) ((n) && ((n) & ((n)-1)) == 0)
#endif
-/* This is a helper macro to avoid preprocessor issues, replaced by nothing
- * unless under DEBUGGING, where it expands to an assert of its argument,
- * followed by a comma (hence the comma operator). If we just used a straight
- * assert(), we would get a comma with nothing before it when not DEBUGGING.
- *
- * We also use empty definition under Coverity since the __ASSERT__
- * checks often check for things that Really Cannot Happen, and Coverity
- * detects that and gets all excited. */
+/*
+=for apidoc Am|void|__ASSERT_|bool expr
+
+This is a helper macro to avoid preprocessor issues, replaced by nothing
+unless under DEBUGGING, where it expands to an assert of its argument,
+followed by a comma (hence the comma operator). If we just used a straight
+assert(), we would get a comma with nothing before it when not DEBUGGING.
+
+=cut
+
+We also use empty definition under Coverity since the __ASSERT__
+checks often check for things that Really Cannot Happen, and Coverity
+detects that and gets all excited. */
#if defined(DEBUGGING) && !defined(__COVERITY__)
# define __ASSERT_(statement) assert(statement),
/*
=head1 SV Manipulation Functions
-=for apidoc Ama|SV*|newSVpvs|"literal string" s
+=for apidoc Ama|SV*|newSVpvs|"literal string"
Like C<newSVpvn>, but takes a literal string instead of a
string/length pair.
-=for apidoc Ama|SV*|newSVpvs_flags|"literal string" s|U32 flags
+=for apidoc Ama|SV*|newSVpvs_flags|"literal string"|U32 flags
Like C<newSVpvn_flags>, but takes a literal string instead of
a string/length pair.
-=for apidoc Ama|SV*|newSVpvs_share|"literal string" s
+=for apidoc Ama|SV*|newSVpvs_share|"literal string"
Like C<newSVpvn_share>, but takes a literal string instead of
a string/length pair and omits the hash parameter.
-=for apidoc Am|void|sv_catpvs_flags|SV* sv|"literal string" s|I32 flags
+=for apidoc Am|void|sv_catpvs_flags|SV* sv|"literal string"|I32 flags
Like C<sv_catpvn_flags>, but takes a literal string instead
of a string/length pair.
-=for apidoc Am|void|sv_catpvs_nomg|SV* sv|"literal string" s
+=for apidoc Am|void|sv_catpvs_nomg|SV* sv|"literal string"
Like C<sv_catpvn_nomg>, but takes a literal string instead of
a string/length pair.
-=for apidoc Am|void|sv_catpvs|SV* sv|"literal string" s
+=for apidoc Am|void|sv_catpvs|SV* sv|"literal string"
Like C<sv_catpvn>, but takes a literal string instead of a
string/length pair.
-=for apidoc Am|void|sv_catpvs_mg|SV* sv|"literal string" s
+=for apidoc Am|void|sv_catpvs_mg|SV* sv|"literal string"
Like C<sv_catpvn_mg>, but takes a literal string instead of a
string/length pair.
-=for apidoc Am|void|sv_setpvs|SV* sv|"literal string" s
+=for apidoc Am|void|sv_setpvs|SV* sv|"literal string"
Like C<sv_setpvn>, but takes a literal string instead of a
string/length pair.
-=for apidoc Am|void|sv_setpvs_mg|SV* sv|"literal string" s
+=for apidoc Am|void|sv_setpvs_mg|SV* sv|"literal string"
Like C<sv_setpvn_mg>, but takes a literal string instead of a
string/length pair.
-=for apidoc Am|SV *|sv_setref_pvs|"literal string" s
+=for apidoc Am|SV *|sv_setref_pvs|SV *const rv|const char *const classname|"literal string"
Like C<sv_setref_pvn>, but takes a literal string instead of
a string/length pair.
=head1 Memory Management
-=for apidoc Ama|char*|savepvs|"literal string" s
+=for apidoc Ama|char*|savepvs|"literal string"
Like C<savepvn>, but takes a literal string instead of a
string/length pair.
-=for apidoc Ama|char*|savesharedpvs|"literal string" s
+=for apidoc Ama|char*|savesharedpvs|"literal string"
A version of C<savepvs()> which allocates the duplicate string in memory
which is shared between threads.
=head1 GV Functions
-=for apidoc Am|HV*|gv_stashpvs|"literal string" name|I32 create
+=for apidoc Am|HV*|gv_stashpvs|"name"|I32 create
Like C<gv_stashpvn>, but takes a literal string instead of a
string/length pair.
=head1 Hash Manipulation Functions
-=for apidoc Am|SV**|hv_fetchs|HV* tb|"literal string" key|I32 lval
+=for apidoc Am|SV**|hv_fetchs|HV* tb|"key"|I32 lval
Like C<hv_fetch>, but takes a literal string instead of a
string/length pair.
-=for apidoc Am|SV**|hv_stores|HV* tb|"literal string" key|SV* val
+=for apidoc Am|SV**|hv_stores|HV* tb|"key"|SV* val
Like C<hv_store>, but takes a literal string instead of a
string/length pair
and omits the hash parameter.
=head1 Lexer interface
-=for apidoc Amx|void|lex_stuff_pvs|"literal string" pv|U32 flags
+=for apidoc Amx|void|lex_stuff_pvs|"pv"|U32 flags
Like L</lex_stuff_pvn>, but takes a literal string instead of
a string/length pair.
=cut
*/
-/* concatenating with "" ensures that only literal strings are accepted as
- * argument */
-#define STR_WITH_LEN(s) ("" s ""), (sizeof(s)-1)
+/*
+=head1 Handy Values
-/* note that STR_WITH_LEN() can't be used as argument to macros or functions
- * that under some configurations might be macros, which means that it requires
- * the full Perl_xxx(aTHX_ ...) form for any API calls where it's used.
- */
+=for apidoc Amu|pair|STR_WITH_LEN|"literal string"
+
+Returns two comma separated tokens of the input literal string, and its length.
+This is convenience macro which helps out in some API calls.
+Note that it can't be used as an argument to macros or functions that under
+some configurations might be macros, which means that it requires the full
+Perl_xxx(aTHX_ ...) form for any API calls where it's used.
+
+=cut
+*/
+
+
+#define STR_WITH_LEN(s) ("" s ""), (sizeof(s)-1)
/* STR_WITH_LEN() shortcuts */
#define newSVpvs(str) Perl_newSVpvn(aTHX_ STR_WITH_LEN(str))
are equal. The C<len> parameter indicates the number of bytes to compare.
Returns zero if equal, or non-zero if non-equal.
+=for apidoc Am|bool|memEQs|char* s1|STRLEN l1|"s2"
+Like L</memEQ>, but the second string is a literal enclosed in double quotes,
+C<l1> gives the number of bytes in C<s1>.
+Returns zero if equal, or non-zero if non-equal.
+
=for apidoc Am|bool|memNE|char* s1|char* s2|STRLEN len
Test two buffers (which may contain embedded C<NUL> characters, to see if they
are not equal. The C<len> parameter indicates the number of bytes to compare.
Returns zero if non-equal, or non-zero if equal.
+=for apidoc Am|bool|memNEs|char* s1|STRLEN l1|"s2"
+Like L</memNE>, but the second string is a literal enclosed in double quotes,
+C<l1> gives the number of bytes in C<s1>.
+Returns zero if non-equal, or zero if non-equal.
+
=cut
New macros should use the following conventions for their names (which are
ones valid for it.) None are affected by C<use bytes>, and only the ones
with C<LC> in the name are affected by the current locale.
-The base function, e.g., C<isALPHA()>, takes an octet (either a C<char> or a
-C<U8>) as input and returns a boolean as to whether or not the character
-represented by that octet is (or on non-ASCII platforms, corresponds to) an
+The base function, e.g., C<isALPHA()>, takes any signed or unsigned value,
+treating it as a code point, and returns a boolean as to whether or not the
+character represented by it is (or on non-ASCII platforms, corresponds to) an
ASCII character in the named class based on platform, Unicode, and Perl rules.
If the input is a number that doesn't fit in an octet, FALSE is returned.
as if they are Latin-1 characters. For example, C<isWORDCHAR_L1()> will return
true when called with the code point 0xDF, which is a word character in both
ASCII and EBCDIC (though it represents different characters in each).
+If the input is a number that doesn't fit in an octet, FALSE is returned.
+(Perl's documentation uses a colloquial definition of Latin-1, to include all
+code points below 256.)
-Variant C<isI<FOO>_uvchr> is like the C<isI<FOO>_L1> variant, but accepts any UV code
-point as input. If the code point is larger than 255, Unicode rules are used
-to determine if it is in the character class. For example,
+Variant C<isI<FOO>_uvchr> is exactly like the C<isI<FOO>_L1> variant, for
+inputs below 256, but if the code point is larger than 255, Unicode rules are
+used to determine if it is in the character class. For example,
C<isWORDCHAR_uvchr(0x100)> returns TRUE, since 0x100 is LATIN CAPITAL LETTER A
WITH MACRON in Unicode, and is a word character.
Variant C<isI<FOO>_utf8_safe> is like C<isI<FOO>_uvchr>, but is used for UTF-8
-encoded strings. Each call classifies one character, even if the string
-contains many. This variant takes two parameters. The first, C<p>, is a
+encoded strings. Each call classifies the first character of the string. This
+variant takes two parameters. The first, C<p>, is a
pointer to the first byte of the character to be classified. (Recall that it
may take more than one byte to represent a character in UTF-8 strings.) The
second parameter, C<e>, points to anywhere in the string beyond the first
Variant C<isI<FOO>_utf8> is like C<isI<FOO>_utf8_safe>, but takes just a single
parameter, C<p>, which has the same meaning as the corresponding parameter does
in C<isI<FOO>_utf8_safe>. The function therefore can't check if it is reading
-beyond the end of the string. Starting in Perl v5.30, it will take a second
+beyond the end of the string. Starting in Perl v5.32, it will take a second
parameter, becoming a synonym for C<isI<FOO>_utf8_safe>. At that time every
program that uses it will have to be changed to successfully compile. In the
meantime, the first runtime call to C<isI<FOO>_utf8> from each call point in the
program will raise a deprecation warning, enabled by default. You can convert
your program now to use C<isI<FOO>_utf8_safe>, and avoid the warnings, and get an
-extra measure of protection, or you can wait until v5.30, when you'll be forced
+extra measure of protection, or you can wait until v5.32, when you'll be forced
to add the C<e> parameter.
-Variant C<isI<FOO>_LC> is like the C<isI<FOO>_A> and C<isI<FOO>_L1> variants, but the
-result is based on the current locale, which is what C<LC> in the name stands
-for. If Perl can determine that the current locale is a UTF-8 locale, it uses
-the published Unicode rules; otherwise, it uses the C library function that
-gives the named classification. For example, C<isDIGIT_LC()> when not in a
-UTF-8 locale returns the result of calling C<isdigit()>. FALSE is always
+Variant C<isI<FOO>_LC> is like the C<isI<FOO>_A> and C<isI<FOO>_L1> variants,
+but the result is based on the current locale, which is what C<LC> in the name
+stands for. If Perl can determine that the current locale is a UTF-8 locale,
+it uses the published Unicode rules; otherwise, it uses the C library function
+that gives the named classification. For example, C<isDIGIT_LC()> when not in
+a UTF-8 locale returns the result of calling C<isdigit()>. FALSE is always
returned if the input won't fit into an octet. On some platforms where the C
library function is known to be defective, Perl changes its result to follow
the POSIX standard's rules.
-Variant C<isI<FOO>_LC_uvchr> is like C<isI<FOO>_LC>, but is defined on any UV. It
-returns the same as C<isI<FOO>_LC> for input code points less than 256, and
-returns the hard-coded, not-affected-by-locale, Unicode results for larger ones.
+Variant C<isI<FOO>_LC_uvchr> acts exactly like C<isI<FOO>_LC> for inputs less
+than 256, but for larger ones it returns the Unicode classification of the code
+point.
Variant C<isI<FOO>_LC_utf8_safe> is like C<isI<FOO>_LC_uvchr>, but is used for UTF-8
-encoded strings. Each call classifies one character, even if the string
-contains many. This variant takes two parameters. The first, C<p>, is a
-pointer to the first byte of the character to be classified. (Recall that it
-may take more than one byte to represent a character in UTF-8 strings.) The
-second parameter, C<e>, points to anywhere in the string beyond the first
-character, up to one byte past the end of the entire string. The suffix
-C<_safe> in the function's name indicates that it will not attempt to read
-beyond S<C<e - 1>>, provided that the constraint S<C<s E<lt> e>> is true (this
-is asserted for in C<-DDEBUGGING> builds). If the UTF-8 for the input
-character is malformed in some way, the program may croak, or the function may
-return FALSE, at the discretion of the implementation, and subject to change in
-future releases.
+encoded strings. Each call classifies the first character of the string. This
+variant takes two parameters. The first, C<p>, is a pointer to the first byte
+of the character to be classified. (Recall that it may take more than one byte
+to represent a character in UTF-8 strings.) The second parameter, C<e>,
+points to anywhere in the string beyond the first character, up to one byte
+past the end of the entire string. The suffix C<_safe> in the function's name
+indicates that it will not attempt to read beyond S<C<e - 1>>, provided that
+the constraint S<C<s E<lt> e>> is true (this is asserted for in C<-DDEBUGGING>
+builds). If the UTF-8 for the input character is malformed in some way, the
+program may croak, or the function may return FALSE, at the discretion of the
+implementation, and subject to change in future releases.
Variant C<isI<FOO>_LC_utf8> is like C<isI<FOO>_LC_utf8_safe>, but takes just a single
parameter, C<p>, which has the same meaning as the corresponding parameter does
in C<isI<FOO>_LC_utf8_safe>. The function therefore can't check if it is reading
-beyond the end of the string. Starting in Perl v5.30, it will take a second
+beyond the end of the string. Starting in Perl v5.32, it will take a second
parameter, becoming a synonym for C<isI<FOO>_LC_utf8_safe>. At that time every
program that uses it will have to be changed to successfully compile. In the
meantime, the first runtime call to C<isI<FOO>_LC_utf8> from each call point in
the program will raise a deprecation warning, enabled by default. You can
convert your program now to use C<isI<FOO>_LC_utf8_safe>, and avoid the warnings,
-and get an extra measure of protection, or you can wait until v5.30, when
+and get an extra measure of protection, or you can wait until v5.32, when
you'll be forced to add the C<e> parameter.
-=for apidoc Am|bool|isALPHA|char ch
-Returns a boolean indicating whether the specified character is an
-alphabetic character, analogous to C<m/[[:alpha:]]/>.
+=for apidoc Am|bool|isALPHA|int ch
+Returns a boolean indicating whether the specified input is one of C<[A-Za-z]>,
+analogous to C<m/[[:alpha:]]/>.
See the L<top of this section|/Character classification> for an explanation of
variants
C<isALPHA_A>, C<isALPHA_L1>, C<isALPHA_uvchr>, C<isALPHA_utf8_safe>,
C<isALPHA_LC>, C<isALPHA_LC_uvchr>, and C<isALPHA_LC_utf8_safe>.
-=for apidoc Am|bool|isALPHANUMERIC|char ch
-Returns a boolean indicating whether the specified character is a either an
-alphabetic character or decimal digit, analogous to C<m/[[:alnum:]]/>.
+=cut
+
+Here and below, we add the protoypes of these macros for downstream programs
+that would be interested in them, such as Devel::PPPort
+
+=for apidoc Amh|bool|isALPHA_A|int ch
+=for apidoc Amh|bool|isALPHA_L1|int ch
+=for apidoc Amh|bool|isALPHA_uvchr|int ch
+=for apidoc Amh|bool|isALPHA_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isALPHA_utf8|U8 * s
+=for apidoc Amh|bool|isALPHA_LC|int ch
+=for apidoc Amh|bool|isALPHA_LC_uvchr|int ch
+=for apidoc Amh|bool|isALPHA_LC_utf8_safe|U8 * s| U8 *end
+
+=for apidoc Am|bool|isALPHANUMERIC|int ch
+Returns a boolean indicating whether the specified character is one of
+C<[A-Za-z0-9]>, analogous to C<m/[[:alnum:]]/>.
See the L<top of this section|/Character classification> for an explanation of
variants
C<isALPHANUMERIC_A>, C<isALPHANUMERIC_L1>, C<isALPHANUMERIC_uvchr>,
C<isALPHANUMERIC_utf8_safe>, C<isALPHANUMERIC_LC>, C<isALPHANUMERIC_LC_uvchr>,
and C<isALPHANUMERIC_LC_utf8_safe>.
-=for apidoc Am|bool|isASCII|char ch
+A (discouraged from use) synonym is C<isALNUMC> (where the C<C> suffix means
+this corresponds to the C language alphanumeric definition). Also
+there are the variants
+C<isALNUMC_A>, C<isALNUMC_L1>
+C<isALNUMC_LC>, and C<isALNUMC_LC_uvchr>.
+
+=for apidoc Amh|bool|isALPHANUMERIC_A|int ch
+=for apidoc Amh|bool|isALPHANUMERIC_L1|int ch
+=for apidoc Amh|bool|isALPHANUMERIC_uvchr|int ch
+=for apidoc Amh|bool|isALPHANUMERIC_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isALPHANUMERIC_utf8|U8 * s
+=for apidoc Amh|bool|isALPHANUMERIC_LC|int ch
+=for apidoc Amh|bool|isALPHANUMERIC_LC_uvchr|int ch
+=for apidoc Amh|bool|isALPHANUMERIC_LC_utf8_safe|U8 * s| U8 *end
+=for apidoc Amh|bool|isALNUMC|int ch
+=for apidoc Amh|bool|isALNUMC_A|int ch
+=for apidoc Amh|bool|isALNUMC_L1|int ch
+=for apidoc Amh|bool|isALNUMC_LC|int ch
+=for apidoc Amh|bool|isALNUMC_LC_uvchr|int ch
+
+=for apidoc Am|bool|isASCII|int ch
Returns a boolean indicating whether the specified character is one of the 128
characters in the ASCII character set, analogous to C<m/[[:ascii:]]/>.
On non-ASCII platforms, it returns TRUE iff this
library routine C<isascii()>. In these cases, the variants whose names contain
C<LC> are the same as the corresponding ones without.
+=for apidoc Amh|bool|isASCII_A|int ch
+=for apidoc Amh|bool|isASCII_L1|int ch
+=for apidoc Amh|bool|isASCII_uvchr|int ch
+=for apidoc Amh|bool|isASCII_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isASCII_utf8|U8 * s
+=for apidoc Amh|bool|isASCII_LC|int ch
+=for apidoc Amh|bool|isASCII_LC_uvchr|int ch
+=for apidoc Amh|bool|isASCII_LC_utf8_safe|U8 * s| U8 *end
+
Also note, that because all ASCII characters are UTF-8 invariant (meaning they
have the exact same representation (always a single byte) whether encoded in
UTF-8 or not), C<isASCII> will give the correct results when called with any
C<isblank()>. In these cases, the variants whose names contain C<LC> are
the same as the corresponding ones without.
+=for apidoc Amh|bool|isBLANK_A|int ch
+=for apidoc Amh|bool|isBLANK_L1|int ch
+=for apidoc Amh|bool|isBLANK_uvchr|int ch
+=for apidoc Amh|bool|isBLANK_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isBLANK_utf8|U8 * s
+=for apidoc Amh|bool|isBLANK_LC|int ch
+=for apidoc Amh|bool|isBLANK_LC_uvchr|int ch
+=for apidoc Amh|bool|isBLANK_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isCNTRL|char ch
Returns a boolean indicating whether the specified character is a
control character, analogous to C<m/[[:cntrl:]]/>.
C<isCNTRL_LC>, C<isCNTRL_LC_uvchr>, and C<isCNTRL_LC_utf8_safe> On EBCDIC
platforms, you almost always want to use the C<isCNTRL_L1> variant.
+=for apidoc Amh|bool|isCNTRL_A|int ch
+=for apidoc Amh|bool|isCNTRL_L1|int ch
+=for apidoc Amh|bool|isCNTRL_uvchr|int ch
+=for apidoc Amh|bool|isCNTRL_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isCNTRL_utf8|U8 * s
+=for apidoc Amh|bool|isCNTRL_LC|int ch
+=for apidoc Amh|bool|isCNTRL_LC_uvchr|int ch
+=for apidoc Amh|bool|isCNTRL_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isDIGIT|char ch
Returns a boolean indicating whether the specified character is a
digit, analogous to C<m/[[:digit:]]/>.
C<isDIGIT_uvchr>, C<isDIGIT_utf8_safe>, C<isDIGIT_LC>, C<isDIGIT_LC_uvchr>, and
C<isDIGIT_LC_utf8_safe>.
+=for apidoc Amh|bool|isDIGIT_A|int ch
+=for apidoc Amh|bool|isDIGIT_L1|int ch
+=for apidoc Amh|bool|isDIGIT_uvchr|int ch
+=for apidoc Amh|bool|isDIGIT_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isDIGIT_utf8|U8 * s
+=for apidoc Amh|bool|isDIGIT_LC|int ch
+=for apidoc Amh|bool|isDIGIT_LC_uvchr|int ch
+=for apidoc Amh|bool|isDIGIT_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isGRAPH|char ch
Returns a boolean indicating whether the specified character is a
graphic character, analogous to C<m/[[:graph:]]/>.
variants C<isGRAPH_A>, C<isGRAPH_L1>, C<isGRAPH_uvchr>, C<isGRAPH_utf8_safe>,
C<isGRAPH_LC>, C<isGRAPH_LC_uvchr>, and C<isGRAPH_LC_utf8_safe>.
+=for apidoc Amh|bool|isGRAPH_A|int ch
+=for apidoc Amh|bool|isGRAPH_L1|int ch
+=for apidoc Amh|bool|isGRAPH_uvchr|int ch
+=for apidoc Amh|bool|isGRAPH_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isGRAPH_utf8|U8 * s
+=for apidoc Amh|bool|isGRAPH_LC|int ch
+=for apidoc Amh|bool|isGRAPH_LC_uvchr|int ch
+=for apidoc Amh|bool|isGRAPH_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isLOWER|char ch
Returns a boolean indicating whether the specified character is a
lowercase character, analogous to C<m/[[:lower:]]/>.
C<isLOWER_A>, C<isLOWER_L1>, C<isLOWER_uvchr>, C<isLOWER_utf8_safe>,
C<isLOWER_LC>, C<isLOWER_LC_uvchr>, and C<isLOWER_LC_utf8_safe>.
+=for apidoc Amh|bool|isLOWER_A|int ch
+=for apidoc Amh|bool|isLOWER_L1|int ch
+=for apidoc Amh|bool|isLOWER_uvchr|int ch
+=for apidoc Amh|bool|isLOWER_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isLOWER_utf8|U8 * s
+=for apidoc Amh|bool|isLOWER_LC|int ch
+=for apidoc Amh|bool|isLOWER_LC_uvchr|int ch
+=for apidoc Amh|bool|isLOWER_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isOCTAL|char ch
Returns a boolean indicating whether the specified character is an
octal digit, [0-7].
The only two variants are C<isOCTAL_A> and C<isOCTAL_L1>; each is identical to
C<isOCTAL>.
+=for apidoc Amh|bool|isOCTAL_A|int ch
+=for apidoc Amh|bool|isOCTAL_L1|int ch
+
=for apidoc Am|bool|isPUNCT|char ch
Returns a boolean indicating whether the specified character is a
punctuation character, analogous to C<m/[[:punct:]]/>.
variants C<isPUNCT_A>, C<isPUNCT_L1>, C<isPUNCT_uvchr>, C<isPUNCT_utf8_safe>,
C<isPUNCT_LC>, C<isPUNCT_LC_uvchr>, and C<isPUNCT_LC_utf8_safe>.
+=for apidoc Amh|bool|isPUNCT_A|int ch
+=for apidoc Amh|bool|isPUNCT_L1|int ch
+=for apidoc Amh|bool|isPUNCT_uvchr|int ch
+=for apidoc Amh|bool|isPUNCT_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isPUNCT_utf8|U8 * s
+=for apidoc Amh|bool|isPUNCT_LC|int ch
+=for apidoc Amh|bool|isPUNCT_LC_uvchr|int ch
+=for apidoc Amh|bool|isPUNCT_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isSPACE|char ch
Returns a boolean indicating whether the specified character is a
whitespace character. This is analogous
C<isSPACE_A>, C<isSPACE_L1>, C<isSPACE_uvchr>, C<isSPACE_utf8_safe>,
C<isSPACE_LC>, C<isSPACE_LC_uvchr>, and C<isSPACE_LC_utf8_safe>.
+=for apidoc Amh|bool|isSPACE_A|int ch
+=for apidoc Amh|bool|isSPACE_L1|int ch
+=for apidoc Amh|bool|isSPACE_uvchr|int ch
+=for apidoc Amh|bool|isSPACE_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isSPACE_utf8|U8 * s
+=for apidoc Amh|bool|isSPACE_LC|int ch
+=for apidoc Amh|bool|isSPACE_LC_uvchr|int ch
+=for apidoc Amh|bool|isSPACE_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isPSXSPC|char ch
(short for Posix Space)
Starting in 5.18, this is identical in all its forms to the
variants C<isPSXSPC_A>, C<isPSXSPC_L1>, C<isPSXSPC_uvchr>, C<isPSXSPC_utf8_safe>,
C<isPSXSPC_LC>, C<isPSXSPC_LC_uvchr>, and C<isPSXSPC_LC_utf8_safe>.
+=for apidoc Amh|bool|isPSXSPC_A|int ch
+=for apidoc Amh|bool|isPSXSPC_L1|int ch
+=for apidoc Amh|bool|isPSXSPC_uvchr|int ch
+=for apidoc Amh|bool|isPSXSPC_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isPSXSPC_utf8|U8 * s
+=for apidoc Amh|bool|isPSXSPC_LC|int ch
+=for apidoc Amh|bool|isPSXSPC_LC_uvchr|int ch
+=for apidoc Amh|bool|isPSXSPC_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isUPPER|char ch
Returns a boolean indicating whether the specified character is an
uppercase character, analogous to C<m/[[:upper:]]/>.
variants C<isUPPER_A>, C<isUPPER_L1>, C<isUPPER_uvchr>, C<isUPPER_utf8_safe>,
C<isUPPER_LC>, C<isUPPER_LC_uvchr>, and C<isUPPER_LC_utf8_safe>.
+=for apidoc Amh|bool|isUPPER_A|int ch
+=for apidoc Amh|bool|isUPPER_L1|int ch
+=for apidoc Amh|bool|isUPPER_uvchr|int ch
+=for apidoc Amh|bool|isUPPER_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isUPPER_utf8|U8 * s
+=for apidoc Amh|bool|isUPPER_LC|int ch
+=for apidoc Amh|bool|isUPPER_LC_uvchr|int ch
+=for apidoc Amh|bool|isUPPER_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isPRINT|char ch
Returns a boolean indicating whether the specified character is a
printable character, analogous to C<m/[[:print:]]/>.
C<isPRINT_A>, C<isPRINT_L1>, C<isPRINT_uvchr>, C<isPRINT_utf8_safe>,
C<isPRINT_LC>, C<isPRINT_LC_uvchr>, and C<isPRINT_LC_utf8_safe>.
+=for apidoc Amh|bool|isPRINT_A|int ch
+=for apidoc Amh|bool|isPRINT_L1|int ch
+=for apidoc Amh|bool|isPRINT_uvchr|int ch
+=for apidoc Amh|bool|isPRINT_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isPRINT_utf8|U8 * s
+=for apidoc Amh|bool|isPRINT_LC|int ch
+=for apidoc Amh|bool|isPRINT_LC_uvchr|int ch
+=for apidoc Amh|bool|isPRINT_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isWORDCHAR|char ch
Returns a boolean indicating whether the specified character is a character
that is a word character, analogous to what C<m/\w/> and C<m/[[:word:]]/> match
C<isWORDCHAR_LC_utf8_safe> are also as described there, but additionally
include the platform's native underscore.
+=for apidoc Amh|bool|isWORDCHAR_A|int ch
+=for apidoc Amh|bool|isWORDCHAR_L1|int ch
+=for apidoc Amh|bool|isWORDCHAR_uvchr|int ch
+=for apidoc Amh|bool|isWORDCHAR_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isWORDCHAR_utf8|U8 * s
+=for apidoc Amh|bool|isWORDCHAR_LC|int ch
+=for apidoc Amh|bool|isWORDCHAR_LC_uvchr|int ch
+=for apidoc Amh|bool|isWORDCHAR_LC_utf8_safe|U8 * s| U8 *end
+=for apidoc Amh|bool|isALNUM|int ch
+=for apidoc Amh|bool|isALNUM_A|int ch
+=for apidoc Amh|bool|isALNUM_LC|int ch
+=for apidoc Amh|bool|isALNUM_LC_uvchr|int ch
+
=for apidoc Am|bool|isXDIGIT|char ch
Returns a boolean indicating whether the specified character is a hexadecimal
digit. In the ASCII range these are C<[0-9A-Fa-f]>. Variants C<isXDIGIT_A()>
C<isXDIGIT_uvchr>, C<isXDIGIT_utf8_safe>, C<isXDIGIT_LC>, C<isXDIGIT_LC_uvchr>,
and C<isXDIGIT_LC_utf8_safe>.
+=for apidoc Amh|bool|isXDIGIT_A|int ch
+=for apidoc Amh|bool|isXDIGIT_L1|int ch
+=for apidoc Amh|bool|isXDIGIT_uvchr|int ch
+=for apidoc Amh|bool|isXDIGIT_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isXDIGIT_utf8|U8 * s
+=for apidoc Amh|bool|isXDIGIT_LC|int ch
+=for apidoc Amh|bool|isXDIGIT_LC_uvchr|int ch
+=for apidoc Amh|bool|isXDIGIT_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isIDFIRST|char ch
Returns a boolean indicating whether the specified character can be the first
character of an identifier. This is very close to, but not quite the same as
C<isIDFIRST_A>, C<isIDFIRST_L1>, C<isIDFIRST_uvchr>, C<isIDFIRST_utf8_safe>,
C<isIDFIRST_LC>, C<isIDFIRST_LC_uvchr>, and C<isIDFIRST_LC_utf8_safe>.
+=for apidoc Amh|bool|isIDFIRST_A|int ch
+=for apidoc Amh|bool|isIDFIRST_L1|int ch
+=for apidoc Amh|bool|isIDFIRST_uvchr|int ch
+=for apidoc Amh|bool|isIDFIRST_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isIDFIRST_utf8|U8 * s
+=for apidoc Amh|bool|isIDFIRST_LC|int ch
+=for apidoc Amh|bool|isIDFIRST_LC_uvchr|int ch
+=for apidoc Amh|bool|isIDFIRST_LC_utf8_safe|U8 * s| U8 *end
+
=for apidoc Am|bool|isIDCONT|char ch
Returns a boolean indicating whether the specified character can be the
second or succeeding character of an identifier. This is very close to, but
C<isIDCONT_utf8_safe>, C<isIDCONT_LC>, C<isIDCONT_LC_uvchr>, and
C<isIDCONT_LC_utf8_safe>.
+=for apidoc Amh|bool|isIDCONT_A|int ch
+=for apidoc Amh|bool|isIDCONT_L1|int ch
+=for apidoc Amh|bool|isIDCONT_uvchr|int ch
+=for apidoc Amh|bool|isIDCONT_utf8_safe|U8 * s|U8 * end
+=for apidoc Amh|bool|isIDCONT_utf8|U8 * s
+=for apidoc Amh|bool|isIDCONT_LC|int ch
+=for apidoc Amh|bool|isIDCONT_LC_uvchr|int ch
+=for apidoc Amh|bool|isIDCONT_LC_utf8_safe|U8 * s| U8 *end
+
=head1 Miscellaneous Functions
=for apidoc Am|U8|READ_XDIGIT|char str*
no other function that is crippled by not being able to give the correct
results for the full range of possible inputs has been implemented here.
-=for apidoc Am|U8|toUPPER|U8 ch
+=for apidoc Am|U8|toUPPER|int ch
Converts the specified character to uppercase. If the input is anything but an
ASCII lowercase character, that input character itself is returned. Variant
C<toUPPER_A> is equivalent.
=for apidoc Am|UV|toUPPER_utf8|U8* p|U8* s|STRLEN* lenp
This is like C<L</toUPPER_utf8_safe>>, but doesn't have the C<e>
parameter The function therefore can't check if it is reading
-beyond the end of the string. Starting in Perl v5.30, it will take the C<e>
+beyond the end of the string. Starting in Perl v5.32, it will take the C<e>
parameter, becoming a synonym for C<toUPPER_utf8_safe>. At that time every
program that uses it will have to be changed to successfully compile. In the
meantime, the first runtime call to C<toUPPER_utf8> from each call point in the
program will raise a deprecation warning, enabled by default. You can convert
your program now to use C<toUPPER_utf8_safe>, and avoid the warnings, and get an
-extra measure of protection, or you can wait until v5.30, when you'll be forced
+extra measure of protection, or you can wait until v5.32, when you'll be forced
to add the C<e> parameter.
=for apidoc Am|U8|toFOLD|U8 ch
=for apidoc Am|UV|toFOLD_utf8|U8* p|U8* s|STRLEN* lenp
This is like C<L</toFOLD_utf8_safe>>, but doesn't have the C<e>
parameter The function therefore can't check if it is reading
-beyond the end of the string. Starting in Perl v5.30, it will take the C<e>
+beyond the end of the string. Starting in Perl v5.32, it will take the C<e>
parameter, becoming a synonym for C<toFOLD_utf8_safe>. At that time every
program that uses it will have to be changed to successfully compile. In the
meantime, the first runtime call to C<toFOLD_utf8> from each call point in the
program will raise a deprecation warning, enabled by default. You can convert
your program now to use C<toFOLD_utf8_safe>, and avoid the warnings, and get an
-extra measure of protection, or you can wait until v5.30, when you'll be forced
+extra measure of protection, or you can wait until v5.32, when you'll be forced
to add the C<e> parameter.
=for apidoc Am|U8|toLOWER|U8 ch
=for apidoc Am|UV|toLOWER_utf8|U8* p|U8* s|STRLEN* lenp
This is like C<L</toLOWER_utf8_safe>>, but doesn't have the C<e>
parameter The function therefore can't check if it is reading
-beyond the end of the string. Starting in Perl v5.30, it will take the C<e>
+beyond the end of the string. Starting in Perl v5.32, it will take the C<e>
parameter, becoming a synonym for C<toLOWER_utf8_safe>. At that time every
program that uses it will have to be changed to successfully compile. In the
meantime, the first runtime call to C<toLOWER_utf8> from each call point in the
program will raise a deprecation warning, enabled by default. You can convert
your program now to use C<toLOWER_utf8_safe>, and avoid the warnings, and get an
-extra measure of protection, or you can wait until v5.30, when you'll be forced
+extra measure of protection, or you can wait until v5.32, when you'll be forced
to add the C<e> parameter.
=for apidoc Am|U8|toTITLE|U8 ch
=for apidoc Am|UV|toTITLE_utf8|U8* p|U8* s|STRLEN* lenp
This is like C<L</toLOWER_utf8_safe>>, but doesn't have the C<e>
parameter The function therefore can't check if it is reading
-beyond the end of the string. Starting in Perl v5.30, it will take the C<e>
+beyond the end of the string. Starting in Perl v5.32, it will take the C<e>
parameter, becoming a synonym for C<toTITLE_utf8_safe>. At that time every
program that uses it will have to be changed to successfully compile. In the
meantime, the first runtime call to C<toTITLE_utf8> from each call point in the
program will raise a deprecation warning, enabled by default. You can convert
your program now to use C<toTITLE_utf8_safe>, and avoid the warnings, and get an
-extra measure of protection, or you can wait until v5.30, when you'll be forced
+extra measure of protection, or you can wait until v5.32, when you'll be forced
to add the C<e> parameter.
=cut
*/
-/* Specify the widest unsigned type on the platform. */
+/*
+ void below because that's the best fit, and works for Devel::PPPort
+=for apidoc AmnU|void|WIDEST_UTYPE
+
+Yields the widest unsigned integer type on the platform, currently either
+C<U32> or C<64>. This can be used in declarations such as
+
+ WIDEST_UTYPE my_uv;
+
+or casts
+
+ my_uv = (WIDEST_UTYPE) val;
+
+=cut
+
+*/
#ifdef QUADKIND
# define WIDEST_UTYPE U64
#else
#define FITS_IN_8_BITS(c) (1)
#endif
-/* Returns true if c is in the range l..u
- * Written with the cast so it only needs one conditional test
- */
-#define inRANGE(c, l, u) (__ASSERT_((u) >= (l)) \
- ((WIDEST_UTYPE) (((c) - (l)) | 0) <= ((WIDEST_UTYPE) ((u) - (l)))))
+/* Returns true if l <= c <= l + n, where 'l' and 'n' are non-negative
+ * Written this way so that after optimization, only one conditional test is
+ * needed. */
+#define withinCOUNT(c, l, n) (__ASSERT_((l) >= 0) __ASSERT_((n) >= (0)) \
+ (((WIDEST_UTYPE) (((c) | 0) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0)))))
+
+/* Returns true if c is in the range l..u, where 'l' is non-negative
+ * Written this way so that after optimization, only one conditional test is
+ * needed. */
+#define inRANGE(c, l, u) (__ASSERT_((l) >= 0) __ASSERT_((u) >= (l)) \
+ ( (sizeof(c) == sizeof(U8)) ? withinCOUNT(((U8) (c)), (l), ((u) - (l))) \
+ : (sizeof(c) == sizeof(U16)) ? withinCOUNT(((U16) (c)), (l), ((u) - (l))) \
+ : (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l))) \
+ : (__ASSERT_(sizeof(c) == sizeof(WIDEST_UTYPE)) \
+ withinCOUNT(( (c)), (l), ((u) - (l))))))
#ifdef EBCDIC
# ifndef _ALL_SOURCE
#define isOCTAL_L1(c) isOCTAL_A(c)
#define isXDIGIT_L1(c) isXDIGIT_A(c)
#define isALNUM(c) isWORDCHAR(c)
+#define isALNUM_A(c) isALNUM(c)
#define isALNUMU(c) isWORDCHAR_L1(c)
#define isALNUM_LC(c) isWORDCHAR_LC(c)
#define isALNUM_uni(c) isWORDCHAR_uni(c)
#define StructCopy(s,d,t) (*((t*)(d)) = *((t*)(s)))
-/* C_ARRAY_LENGTH is the number of elements in the C array (so you
- * want your zero-based indices to be less than but not equal to).
- *
- * C_ARRAY_END is one past the last: half-open/half-closed range,
- * not last-inclusive range. */
+/*
+=head1 Handy Values
+
+=for apidoc Am|STRLEN|C_ARRAY_LENGTH|void *a
+
+Returns the number of elements in the input C array (so you want your
+zero-based indices to be less than but not equal to).
+
+=for apidoc Am|void *|C_ARRAY_END|void *a
+
+Returns a pointer to one element past the final element of the input C array.
+
+=cut
+
+C_ARRAY_END is one past the last: half-open/half-closed range, not
+last-inclusive range.
+*/
#define C_ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
#define C_ARRAY_END(a) ((a) + C_ARRAY_LENGTH(a))