perl5.git.perl.org Git - perl5.git/blame_incremental

Commit	Line	Data
	1	/* handy.h
	2	*
	3	* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1999, 2000,
	4	* 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2012 by Larry Wall and others
	5	*
	6	* You may distribute under the terms of either the GNU General Public
	7	* License or the Artistic License, as specified in the README file.
	8	*
	9	*/
	10
	11	/* IMPORTANT NOTE: Everything whose name begins with an underscore is for
	12	* internal core Perl use only. */
	13
	14	#ifndef PERL_HANDY_H_ /* Guard against nested #inclusion */
	15	#define PERL_HANDY_H_
	16
	17	#ifndef PERL_CORE
	18	# define Null(type) ((type)NULL)
	19
	20	/*
	21	=head1 Handy Values
	22
	23	=for apidoc AmnU\|\|Nullch
	24	Null character pointer. (No longer available when C<PERL_CORE> is
	25	defined.)
	26
	27	=for apidoc AmnU\|\|Nullsv
	28	Null SV pointer. (No longer available when C<PERL_CORE> is defined.)
	29
	30	=cut
	31	*/
	32
	33	# define Nullch Null(char*)
	34	# define Nullfp Null(PerlIO*)
	35	# define Nullsv Null(SV*)
	36	#endif
	37
	38	#ifdef TRUE
	39	#undef TRUE
	40	#endif
	41	#ifdef FALSE
	42	#undef FALSE
	43	#endif
	44	#define TRUE (1)
	45	#define FALSE (0)
	46
	47	/* The MUTABLE_*() macros cast pointers to the types shown, in such a way
	48	* (compiler permitting) that casting away const-ness will give a warning;
	49	* e.g.:
	50	*
	51	* const SV *sv = ...;
	52	* AV av1 = (AV)sv; <== BAD: the const has been silently cast away
	53	* AV *av2 = MUTABLE_AV(sv); <== GOOD: it may warn
	54	*/
	55
	56	#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
	57	# define MUTABLE_PTR(p) ({ void *_p = (p); _p; })
	58	#else
	59	# define MUTABLE_PTR(p) ((void *) (p))
	60	#endif
	61
	62	#define MUTABLE_AV(p) ((AV *)MUTABLE_PTR(p))
	63	#define MUTABLE_CV(p) ((CV *)MUTABLE_PTR(p))
	64	#define MUTABLE_GV(p) ((GV *)MUTABLE_PTR(p))
	65	#define MUTABLE_HV(p) ((HV *)MUTABLE_PTR(p))
	66	#define MUTABLE_IO(p) ((IO *)MUTABLE_PTR(p))
	67	#define MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p))
	68
	69	#if defined(I_STDBOOL) && !defined(PERL_BOOL_AS_CHAR)
	70	# include <stdbool.h>
	71	# ifndef HAS_BOOL
	72	# define HAS_BOOL 1
	73	# endif
	74	#endif
	75
	76	/* bool is built-in for g++-2.6.3 and later, which might be used
	77	for extensions. <_G_config.h> defines _G_HAVE_BOOL, but we can't
	78	be sure _G_config.h will be included before this file. _G_config.h
	79	also defines _G_HAVE_BOOL for both gcc and g++, but only g++
	80	actually has bool. Hence, _G_HAVE_BOOL is pretty useless for us.
	81	g++ can be identified by __GNUG__.
	82	Andy Dougherty February 2000
	83	*/
	84	#ifdef __GNUG__ /* GNU g++ has bool built-in */
	85	# ifndef PERL_BOOL_AS_CHAR
	86	# ifndef HAS_BOOL
	87	# define HAS_BOOL 1
	88	# endif
	89	# endif
	90	#endif
	91
	92	#ifndef HAS_BOOL
	93	# ifdef bool
	94	# undef bool
	95	# endif
	96	# define bool char
	97	# define HAS_BOOL 1
	98	#endif
	99
	100	/*
	101	=for apidoc Am\|bool\|cBOOL\|bool expr
	102
	103	Cast-to-bool. A simple S<C<(bool) I<expr>>> cast may not do the right thing:
	104	if C<bool> is defined as C<char>, for example, then the cast from C<int> is
	105	implementation-defined.
	106
	107	C<(bool)!!(cbool)> in a ternary triggers a bug in xlc on AIX
	108
	109	=cut
	110	*/
	111	#define cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0)
	112
	113	/* Try to figure out __func__ or __FUNCTION__ equivalent, if any.
	114	* XXX Should really be a Configure probe, with HAS__FUNCTION__
	115	* and FUNCTION__ as results.
	116	* XXX Similarly, a Configure probe for __FILE__ and __LINE__ is needed. */
	117	#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) \|\| (defined(__SUNPRO_C)) /* C99 or close enough. */
	118	# define FUNCTION__ __func__
	119	#elif (defined(USING_MSVC6)) \|\| /* MSVC6 has neither __func__ nor __FUNCTION and no good workarounds, either. */ \
	120	(defined(__DECC_VER)) /* Tru64 or VMS, and strict C89 being used, but not modern enough cc (in Tur64, -c99 not known, only -std1). */
	121	# define FUNCTION__ ""
	122	#else
	123	# define FUNCTION__ __FUNCTION__ /* Common extension. */
	124	#endif
	125
	126	/* XXX A note on the perl source internal type system. The
	127	original intent was that I32 be exactly 32 bits.
	128
	129	Currently, we only guarantee that I32 is at least 32 bits.
	130	Specifically, if int is 64 bits, then so is I32. (This is the case
	131	for the Cray.) This has the advantage of meshing nicely with
	132	standard library calls (where we pass an I32 and the library is
	133	expecting an int), but the disadvantage that an I32 is not 32 bits.
	134	Andy Dougherty August 1996
	135
	136	There is no guarantee that there is any integral type with
	137	exactly 32 bits. It is perfectly legal for a system to have
	138	sizeof(short) == sizeof(int) == sizeof(long) == 8.
	139
	140	Similarly, there is no guarantee that I16 and U16 have exactly 16
	141	bits.
	142
	143	For dealing with issues that may arise from various 32/64-bit
	144	systems, we will ask Configure to check out
	145
	146	SHORTSIZE == sizeof(short)
	147	INTSIZE == sizeof(int)
	148	LONGSIZE == sizeof(long)
	149	LONGLONGSIZE == sizeof(long long) (if HAS_LONG_LONG)
	150	PTRSIZE == sizeof(void *)
	151	DOUBLESIZE == sizeof(double)
	152	LONG_DOUBLESIZE == sizeof(long double) (if HAS_LONG_DOUBLE).
	153
	154	*/
	155
	156	#ifdef I_INTTYPES /* e.g. Linux has int64_t without <inttypes.h> */
	157	# include <inttypes.h>
	158	# ifdef INT32_MIN_BROKEN
	159	# undef INT32_MIN
	160	# define INT32_MIN (-2147483647-1)
	161	# endif
	162	# ifdef INT64_MIN_BROKEN
	163	# undef INT64_MIN
	164	# define INT64_MIN (-9223372036854775807LL-1)
	165	# endif
	166	#endif
	167
	168	typedef I8TYPE I8;
	169	typedef U8TYPE U8;
	170	typedef I16TYPE I16;
	171	typedef U16TYPE U16;
	172	typedef I32TYPE I32;
	173	typedef U32TYPE U32;
	174
	175	#ifdef QUADKIND
	176	typedef I64TYPE I64;
	177	typedef U64TYPE U64;
	178	#endif
	179
	180	#if defined(UINT8_MAX) && defined(INT16_MAX) && defined(INT32_MAX)
	181
	182	/* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.
	183	Please search CHAR_MAX in perl.h for further details. */
	184	#define U8_MAX UINT8_MAX
	185	#define U8_MIN UINT8_MIN
	186
	187	#define I16_MAX INT16_MAX
	188	#define I16_MIN INT16_MIN
	189	#define U16_MAX UINT16_MAX
	190	#define U16_MIN UINT16_MIN
	191
	192	#define I32_MAX INT32_MAX
	193	#define I32_MIN INT32_MIN
	194	#ifndef UINT32_MAX_BROKEN /* e.g. HP-UX with gcc messes this up */
	195	# define U32_MAX UINT32_MAX
	196	#else
	197	# define U32_MAX 4294967295U
	198	#endif
	199	#define U32_MIN UINT32_MIN
	200
	201	#else
	202
	203	/* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.
	204	Please search CHAR_MAX in perl.h for further details. */
	205	#define U8_MAX PERL_UCHAR_MAX
	206	#define U8_MIN PERL_UCHAR_MIN
	207
	208	#define I16_MAX PERL_SHORT_MAX
	209	#define I16_MIN PERL_SHORT_MIN
	210	#define U16_MAX PERL_USHORT_MAX
	211	#define U16_MIN PERL_USHORT_MIN
	212
	213	#if LONGSIZE > 4
	214	# define I32_MAX PERL_INT_MAX
	215	# define I32_MIN PERL_INT_MIN
	216	# define U32_MAX PERL_UINT_MAX
	217	# define U32_MIN PERL_UINT_MIN
	218	#else
	219	# define I32_MAX PERL_LONG_MAX
	220	# define I32_MIN PERL_LONG_MIN
	221	# define U32_MAX PERL_ULONG_MAX
	222	# define U32_MIN PERL_ULONG_MIN
	223	#endif
	224
	225	#endif
	226
	227	/* These C99 typedefs are useful sometimes for, say, loop variables whose
	228	* maximum values are small, but for which speed trumps size. If we have a C99
	229	* compiler, use that. Otherwise, a plain 'int' should be good enough.
	230	*
	231	* Restrict these to core for now until we are more certain this is a good
	232	* idea. */
	233	#if defined(PERL_CORE) \|\| defined(PERL_EXT)
	234	# ifdef I_STDINT
	235	typedef int_fast8_t PERL_INT_FAST8_T;
	236	typedef uint_fast8_t PERL_UINT_FAST8_T;
	237	typedef int_fast16_t PERL_INT_FAST16_T;
	238	typedef uint_fast16_t PERL_UINT_FAST16_T;
	239	# else
	240	typedef int PERL_INT_FAST8_T;
	241	typedef unsigned int PERL_UINT_FAST8_T;
	242	typedef int PERL_INT_FAST16_T;
	243	typedef unsigned int PERL_UINT_FAST16_T;
	244	# endif
	245	#endif
	246
	247	/* log(2) (i.e., log base 10 of 2) is pretty close to 0.30103, just in case
	248	* anyone is grepping for it */
	249	#define BIT_DIGITS(N) (((N)146)/485 + 1) / log10(2) =~ 146/485 */
	250	#define TYPE_DIGITS(T) BIT_DIGITS(sizeof(T) * 8)
	251	#define TYPE_CHARS(T) (TYPE_DIGITS(T) + 2) /* sign, NUL */
	252
	253	/* Unused by core; should be deprecated */
	254	#define Ctl(ch) ((ch) & 037)
	255
	256	#if defined(PERL_CORE) \|\| defined(PERL_EXT)
	257	# ifndef MIN
	258	# define MIN(a,b) ((a) < (b) ? (a) : (b))
	259	# endif
	260	# ifndef MAX
	261	# define MAX(a,b) ((a) > (b) ? (a) : (b))
	262	# endif
	263	#endif
	264
	265	/* Returns a boolean as to whether the input unsigned number is a power of 2
	266	* (20, 21, etc). In other words if it has just a single bit set.
	267	* If not, subtracting 1 would leave the uppermost bit set, so the & would
	268	* yield non-zero */
	269	#if defined(PERL_CORE) \|\| defined(PERL_EXT)
	270	# define isPOWER_OF_2(n) ((n) && ((n) & ((n)-1)) == 0)
	271	#endif
	272
	273	/*
	274	=for apidoc Am\|void\|__ASSERT_\|bool expr
	275
	276	This is a helper macro to avoid preprocessor issues, replaced by nothing
	277	unless under DEBUGGING, where it expands to an assert of its argument,
	278	followed by a comma (hence the comma operator). If we just used a straight
	279	assert(), we would get a comma with nothing before it when not DEBUGGING.
	280
	281	=cut
	282
	283	We also use empty definition under Coverity since the __ASSERT__
	284	checks often check for things that Really Cannot Happen, and Coverity
	285	detects that and gets all excited. */
	286
	287	#if defined(DEBUGGING) && !defined(__COVERITY__)
	288	# define __ASSERT_(statement) assert(statement),
	289	#else
	290	# define __ASSERT_(statement)
	291	#endif
	292
	293	/*
	294	=head1 SV Manipulation Functions
	295
	296	=for apidoc Ama\|SV*\|newSVpvs\|"literal string"
	297	Like C<newSVpvn>, but takes a literal string instead of a
	298	string/length pair.
	299
	300	=for apidoc Ama\|SV*\|newSVpvs_flags\|"literal string"\|U32 flags
	301	Like C<newSVpvn_flags>, but takes a literal string instead of
	302	a string/length pair.
	303
	304	=for apidoc Ama\|SV*\|newSVpvs_share\|"literal string"
	305	Like C<newSVpvn_share>, but takes a literal string instead of
	306	a string/length pair and omits the hash parameter.
	307
	308	=for apidoc Am\|void\|sv_catpvs_flags\|SV* sv\|"literal string"\|I32 flags
	309	Like C<sv_catpvn_flags>, but takes a literal string instead
	310	of a string/length pair.
	311
	312	=for apidoc Am\|void\|sv_catpvs_nomg\|SV* sv\|"literal string"
	313	Like C<sv_catpvn_nomg>, but takes a literal string instead of
	314	a string/length pair.
	315
	316	=for apidoc Am\|void\|sv_catpvs\|SV* sv\|"literal string"
	317	Like C<sv_catpvn>, but takes a literal string instead of a
	318	string/length pair.
	319
	320	=for apidoc Am\|void\|sv_catpvs_mg\|SV* sv\|"literal string"
	321	Like C<sv_catpvn_mg>, but takes a literal string instead of a
	322	string/length pair.
	323
	324	=for apidoc Am\|void\|sv_setpvs\|SV* sv\|"literal string"
	325	Like C<sv_setpvn>, but takes a literal string instead of a
	326	string/length pair.
	327
	328	=for apidoc Am\|void\|sv_setpvs_mg\|SV* sv\|"literal string"
	329	Like C<sv_setpvn_mg>, but takes a literal string instead of a
	330	string/length pair.
	331
	332	=for apidoc Am\|SV \|sv_setref_pvs\|SV const rv\|const char *const classname\|"literal string"
	333	Like C<sv_setref_pvn>, but takes a literal string instead of
	334	a string/length pair.
	335
	336	=head1 Memory Management
	337
	338	=for apidoc Ama\|char*\|savepvs\|"literal string"
	339	Like C<savepvn>, but takes a literal string instead of a
	340	string/length pair.
	341
	342	=for apidoc Ama\|char*\|savesharedpvs\|"literal string"
	343	A version of C<savepvs()> which allocates the duplicate string in memory
	344	which is shared between threads.
	345
	346	=head1 GV Functions
	347
	348	=for apidoc Am\|HV*\|gv_stashpvs\|"name"\|I32 create
	349	Like C<gv_stashpvn>, but takes a literal string instead of a
	350	string/length pair.
	351
	352	=head1 Hash Manipulation Functions
	353
	354	=for apidoc Am\|SV*\|hv_fetchs\|HV tb\|"key"\|I32 lval
	355	Like C<hv_fetch>, but takes a literal string instead of a
	356	string/length pair.
	357
	358	=for apidoc Am\|SV*\|hv_stores\|HV tb\|"key"\|SV* val
	359	Like C<hv_store>, but takes a literal string instead of a
	360	string/length pair
	361	and omits the hash parameter.
	362
	363	=head1 Lexer interface
	364
	365	=for apidoc Amx\|void\|lex_stuff_pvs\|"pv"\|U32 flags
	366
	367	Like L</lex_stuff_pvn>, but takes a literal string instead of
	368	a string/length pair.
	369
	370	=cut
	371	*/
	372
	373	/*
	374	=head1 Handy Values
	375
	376	=for apidoc Amu\|pair\|STR_WITH_LEN\|"literal string"
	377
	378	Returns two comma separated tokens of the input literal string, and its length.
	379	This is convenience macro which helps out in some API calls.
	380	Note that it can't be used as an argument to macros or functions that under
	381	some configurations might be macros, which means that it requires the full
	382	Perl_xxx(aTHX_ ...) form for any API calls where it's used.
	383
	384	=cut
	385	*/
	386
	387
	388	#define STR_WITH_LEN(s) ("" s ""), (sizeof(s)-1)
	389
	390	/* STR_WITH_LEN() shortcuts */
	391	#define newSVpvs(str) Perl_newSVpvn(aTHX_ STR_WITH_LEN(str))
	392	#define newSVpvs_flags(str,flags) \
	393	Perl_newSVpvn_flags(aTHX_ STR_WITH_LEN(str), flags)
	394	#define newSVpvs_share(str) Perl_newSVpvn_share(aTHX_ STR_WITH_LEN(str), 0)
	395	#define sv_catpvs_flags(sv, str, flags) \
	396	Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), flags)
	397	#define sv_catpvs_nomg(sv, str) \
	398	Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), 0)
	399	#define sv_catpvs(sv, str) \
	400	Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), SV_GMAGIC)
	401	#define sv_catpvs_mg(sv, str) \
	402	Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), SV_GMAGIC\|SV_SMAGIC)
	403	#define sv_setpvs(sv, str) Perl_sv_setpvn(aTHX_ sv, STR_WITH_LEN(str))
	404	#define sv_setpvs_mg(sv, str) Perl_sv_setpvn_mg(aTHX_ sv, STR_WITH_LEN(str))
	405	#define sv_setref_pvs(rv, classname, str) \
	406	Perl_sv_setref_pvn(aTHX_ rv, classname, STR_WITH_LEN(str))
	407	#define savepvs(str) Perl_savepvn(aTHX_ STR_WITH_LEN(str))
	408	#define savesharedpvs(str) Perl_savesharedpvn(aTHX_ STR_WITH_LEN(str))
	409	#define gv_stashpvs(str, create) \
	410	Perl_gv_stashpvn(aTHX_ STR_WITH_LEN(str), create)
	411	#define gv_fetchpvs(namebeg, add, sv_type) \
	412	Perl_gv_fetchpvn_flags(aTHX_ STR_WITH_LEN(namebeg), add, sv_type)
	413	#define gv_fetchpvn(namebeg, len, add, sv_type) \
	414	Perl_gv_fetchpvn_flags(aTHX_ namebeg, len, add, sv_type)
	415	#define sv_catxmlpvs(dsv, str, utf8) \
	416	Perl_sv_catxmlpvn(aTHX_ dsv, STR_WITH_LEN(str), utf8)
	417
	418
	419	#define lex_stuff_pvs(pv,flags) Perl_lex_stuff_pvn(aTHX_ STR_WITH_LEN(pv), flags)
	420
	421	#define get_cvs(str, flags) \
	422	Perl_get_cvn_flags(aTHX_ STR_WITH_LEN(str), (flags))
	423
	424	/*
	425	=head1 Miscellaneous Functions
	426
	427	=for apidoc Am\|bool\|strNE\|char* s1\|char* s2
	428	Test two C<NUL>-terminated strings to see if they are different. Returns true
	429	or false.
	430
	431	=for apidoc Am\|bool\|strEQ\|char* s1\|char* s2
	432	Test two C<NUL>-terminated strings to see if they are equal. Returns true or
	433	false.
	434
	435	=for apidoc Am\|bool\|strLT\|char* s1\|char* s2
	436	Test two C<NUL>-terminated strings to see if the first, C<s1>, is less than the
	437	second, C<s2>. Returns true or false.
	438
	439	=for apidoc Am\|bool\|strLE\|char* s1\|char* s2
	440	Test two C<NUL>-terminated strings to see if the first, C<s1>, is less than or
	441	equal to the second, C<s2>. Returns true or false.
	442
	443	=for apidoc Am\|bool\|strGT\|char* s1\|char* s2
	444	Test two C<NUL>-terminated strings to see if the first, C<s1>, is greater than
	445	the second, C<s2>. Returns true or false.
	446
	447	=for apidoc Am\|bool\|strGE\|char* s1\|char* s2
	448	Test two C<NUL>-terminated strings to see if the first, C<s1>, is greater than
	449	or equal to the second, C<s2>. Returns true or false.
	450
	451	=for apidoc Am\|bool\|strnNE\|char* s1\|char* s2\|STRLEN len
	452	Test two C<NUL>-terminated strings to see if they are different. The C<len>
	453	parameter indicates the number of bytes to compare. Returns true or false. (A
	454	wrapper for C<strncmp>).
	455
	456	=for apidoc Am\|bool\|strnEQ\|char* s1\|char* s2\|STRLEN len
	457	Test two C<NUL>-terminated strings to see if they are equal. The C<len>
	458	parameter indicates the number of bytes to compare. Returns true or false. (A
	459	wrapper for C<strncmp>).
	460
	461	=for apidoc Am\|bool\|memEQ\|char* s1\|char* s2\|STRLEN len
	462	Test two buffers (which may contain embedded C<NUL> characters, to see if they
	463	are equal. The C<len> parameter indicates the number of bytes to compare.
	464	Returns zero if equal, or non-zero if non-equal.
	465
	466	=for apidoc Am\|bool\|memEQs\|char* s1\|STRLEN l1\|"s2"
	467	Like L</memEQ>, but the second string is a literal enclosed in double quotes,
	468	C<l1> gives the number of bytes in C<s1>.
	469	Returns zero if equal, or non-zero if non-equal.
	470
	471	=for apidoc Am\|bool\|memNE\|char* s1\|char* s2\|STRLEN len
	472	Test two buffers (which may contain embedded C<NUL> characters, to see if they
	473	are not equal. The C<len> parameter indicates the number of bytes to compare.
	474	Returns zero if non-equal, or non-zero if equal.
	475
	476	=for apidoc Am\|bool\|memNEs\|char* s1\|STRLEN l1\|"s2"
	477	Like L</memNE>, but the second string is a literal enclosed in double quotes,
	478	C<l1> gives the number of bytes in C<s1>.
	479	Returns zero if non-equal, or zero if non-equal.
	480
	481	=cut
	482
	483	New macros should use the following conventions for their names (which are
	484	based on the underlying C library functions):
	485
	486	(mem \| str n? ) (EQ \| NE \| LT \| GT \| GE \| (( BEGIN \| END ) P? )) l? s?
	487
	488	Each has two main parameters, string-like operands that are compared
	489	against each other, as specified by the macro name. Some macros may
	490	additionally have one or potentially even two length parameters. If a length
	491	parameter applies to both string parameters, it will be positioned third;
	492	otherwise any length parameter immediately follows the string parameter it
	493	applies to.
	494
	495	If the prefix to the name is 'str', the string parameter is a pointer to a C
	496	language string. Such a string does not contain embedded NUL bytes; its
	497	length may be unknown, but can be calculated by C<strlen()>, since it is
	498	terminated by a NUL, which isn't included in its length.
	499
	500	The optional 'n' following 'str' means that that there is a third parameter,

1

/* handy.h

2

*

3

4

* 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2012 by Larry Wall and others

5

*

6

* You may distribute under the terms of either the GNU General Public

7

* License or the Artistic License, as specified in the README file.

*

*/

/* IMPORTANT NOTE: Everything whose name begins with an underscore is for

12

* internal core Perl use only. */

13

14

#ifndef PERL_HANDY_H_ /* Guard against nested #inclusion */

15

#define PERL_HANDY_H_

16

17

#ifndef PERL_CORE

18

# define Null(type) ((type)NULL)

/*

=head1 Handy Values

=for apidoc AmnU||Nullch

24

Null character pointer. (No longer available when C<PERL_CORE> is

25

defined.)

26

27

=for apidoc AmnU||Nullsv

28

Null SV pointer. (No longer available when C<PERL_CORE> is defined.)

=cut

*/

# define Nullch Null(char*)

34

# define Nullfp Null(PerlIO*)

35

# define Nullsv Null(SV*)

#endif

#ifdef TRUE

#undef TRUE

#endif

#ifdef FALSE

#undef FALSE

#endif

#define TRUE (1)

#define FALSE (0)

/* The MUTABLE_*() macros cast pointers to the types shown, in such a way

48

* (compiler permitting) that casting away const-ness will give a warning;

49

* e.g.:

50

*

51

* const SV *sv = ...;

52

* AV *av1 = (AV*)sv; <== BAD: the const has been silently cast away

53

* AV *av2 = MUTABLE_AV(sv); <== GOOD: it may warn

54

*/

55

56

#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)

57

# define MUTABLE_PTR(p) ({ void *_p = (p); _p; })

58

#else

59

# define MUTABLE_PTR(p) ((void *) (p))

60

#endif

61

62

#define MUTABLE_AV(p) ((AV *)MUTABLE_PTR(p))

63

#define MUTABLE_CV(p) ((CV *)MUTABLE_PTR(p))

64

#define MUTABLE_GV(p) ((GV *)MUTABLE_PTR(p))

65

#define MUTABLE_HV(p) ((HV *)MUTABLE_PTR(p))

66

#define MUTABLE_IO(p) ((IO *)MUTABLE_PTR(p))

67

#define MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p))

68

69

#if defined(I_STDBOOL) && !defined(PERL_BOOL_AS_CHAR)

70

# include <stdbool.h>

# ifndef HAS_BOOL

# define HAS_BOOL 1

# endif

#endif

/* bool is built-in for g++-2.6.3 and later, which might be used

77

for extensions. <_G_config.h> defines _G_HAVE_BOOL, but we can't

78

be sure _G_config.h will be included before this file. _G_config.h

79

also defines _G_HAVE_BOOL for both gcc and g++, but only g++

80

actually has bool. Hence, _G_HAVE_BOOL is pretty useless for us.

81

g++ can be identified by __GNUG__.

82

Andy Dougherty February 2000

83

*/

84

#ifdef __GNUG__ /* GNU g++ has bool built-in */

85

# ifndef PERL_BOOL_AS_CHAR

# ifndef HAS_BOOL

# define HAS_BOOL 1

# endif

# endif

#endif

#ifndef HAS_BOOL

# ifdef bool

# undef bool

# endif

# define bool char

# define HAS_BOOL 1

#endif

/*

=for apidoc Am|bool|cBOOL|bool expr

102

103

Cast-to-bool. A simple S<C<(bool) I<expr>>> cast may not do the right thing:

104

if C<bool> is defined as C<char>, for example, then the cast from C<int> is

105

implementation-defined.

106

107

C<(bool)!!(cbool)> in a ternary triggers a bug in xlc on AIX

=cut

*/

#define cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0)

112

113

/* Try to figure out __func__ or __FUNCTION__ equivalent, if any.

114

* XXX Should really be a Configure probe, with HAS__FUNCTION__

115

* and FUNCTION__ as results.

116

* XXX Similarly, a Configure probe for __FILE__ and __LINE__ is needed. */

117

#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__SUNPRO_C)) /* C99 or close enough. */

118

# define FUNCTION__ __func__

119

#elif (defined(USING_MSVC6)) || /* MSVC6 has neither __func__ nor __FUNCTION and no good workarounds, either. */ \

120

(defined(__DECC_VER)) /* Tru64 or VMS, and strict C89 being used, but not modern enough cc (in Tur64, -c99 not known, only -std1). */

121

# define FUNCTION__ ""

122

#else

123

# define FUNCTION__ __FUNCTION__ /* Common extension. */

124

#endif

125

126

/* XXX A note on the perl source internal type system. The

127

original intent was that I32 be *exactly* 32 bits.

128

129

Currently, we only guarantee that I32 is *at least* 32 bits.

130

Specifically, if int is 64 bits, then so is I32. (This is the case

131

for the Cray.) This has the advantage of meshing nicely with

132

standard library calls (where we pass an I32 and the library is

133

expecting an int), but the disadvantage that an I32 is not 32 bits.

134

Andy Dougherty August 1996

135

136

There is no guarantee that there is *any* integral type with

137

exactly 32 bits. It is perfectly legal for a system to have

138

sizeof(short) == sizeof(int) == sizeof(long) == 8.

139

140

Similarly, there is no guarantee that I16 and U16 have exactly 16

141

bits.

142

143

For dealing with issues that may arise from various 32/64-bit

144

systems, we will ask Configure to check out

145

146

SHORTSIZE == sizeof(short)

147

INTSIZE == sizeof(int)

148

LONGSIZE == sizeof(long)

149

LONGLONGSIZE == sizeof(long long) (if HAS_LONG_LONG)

150

PTRSIZE == sizeof(void *)

151

DOUBLESIZE == sizeof(double)

152

LONG_DOUBLESIZE == sizeof(long double) (if HAS_LONG_DOUBLE).

*/

#ifdef I_INTTYPES /* e.g. Linux has int64_t without <inttypes.h> */

157

# include <inttypes.h>

158

# ifdef INT32_MIN_BROKEN

159

# undef INT32_MIN

160

# define INT32_MIN (-2147483647-1)

161

# endif

162

# ifdef INT64_MIN_BROKEN

163

# undef INT64_MIN

164

# define INT64_MIN (-9223372036854775807LL-1)

# endif

#endif

typedef I8TYPE I8;

typedef U8TYPE U8;

typedef I16TYPE I16;

typedef U16TYPE U16;

typedef I32TYPE I32;

typedef U32TYPE U32;

#ifdef QUADKIND

typedef I64TYPE I64;

typedef U64TYPE U64;

#endif

#if defined(UINT8_MAX) && defined(INT16_MAX) && defined(INT32_MAX)

181

182

/* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.

183

Please search CHAR_MAX in perl.h for further details. */

184

#define U8_MAX UINT8_MAX

185

#define U8_MIN UINT8_MIN

186

187

#define I16_MAX INT16_MAX

188

#define I16_MIN INT16_MIN

189

#define U16_MAX UINT16_MAX

190

#define U16_MIN UINT16_MIN

191

192

#define I32_MAX INT32_MAX

193

#define I32_MIN INT32_MIN

194

#ifndef UINT32_MAX_BROKEN /* e.g. HP-UX with gcc messes this up */

195

# define U32_MAX UINT32_MAX

196

#else

197

# define U32_MAX 4294967295U

198

#endif

199

#define U32_MIN UINT32_MIN

#else

/* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.

204

Please search CHAR_MAX in perl.h for further details. */

205

#define U8_MAX PERL_UCHAR_MAX

206

#define U8_MIN PERL_UCHAR_MIN

207

208

#define I16_MAX PERL_SHORT_MAX

209

#define I16_MIN PERL_SHORT_MIN

210

#define U16_MAX PERL_USHORT_MAX

211

#define U16_MIN PERL_USHORT_MIN

212

213

#if LONGSIZE > 4

214

# define I32_MAX PERL_INT_MAX

215

# define I32_MIN PERL_INT_MIN

216

# define U32_MAX PERL_UINT_MAX

217

# define U32_MIN PERL_UINT_MIN

218

#else

219

# define I32_MAX PERL_LONG_MAX

220

# define I32_MIN PERL_LONG_MIN

221

# define U32_MAX PERL_ULONG_MAX

222

# define U32_MIN PERL_ULONG_MIN

#endif

#endif

/* These C99 typedefs are useful sometimes for, say, loop variables whose

228

* maximum values are small, but for which speed trumps size. If we have a C99

229

* compiler, use that. Otherwise, a plain 'int' should be good enough.

230

*

231

* Restrict these to core for now until we are more certain this is a good

232

* idea. */

233

#if defined(PERL_CORE) || defined(PERL_EXT)

234

# ifdef I_STDINT

235

typedef int_fast8_t PERL_INT_FAST8_T;

236

typedef uint_fast8_t PERL_UINT_FAST8_T;

237

typedef int_fast16_t PERL_INT_FAST16_T;

238

typedef uint_fast16_t PERL_UINT_FAST16_T;

239

# else

240

typedef int PERL_INT_FAST8_T;

241

typedef unsigned int PERL_UINT_FAST8_T;

242

typedef int PERL_INT_FAST16_T;

243

typedef unsigned int PERL_UINT_FAST16_T;

# endif

#endif

/* log(2) (i.e., log base 10 of 2) is pretty close to 0.30103, just in case

248

* anyone is grepping for it */

249

#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log10(2) =~ 146/485 */

250

#define TYPE_DIGITS(T) BIT_DIGITS(sizeof(T) * 8)

251

#define TYPE_CHARS(T) (TYPE_DIGITS(T) + 2) /* sign, NUL */

252

253

/* Unused by core; should be deprecated */

254

#define Ctl(ch) ((ch) & 037)

255

256

#if defined(PERL_CORE) || defined(PERL_EXT)

257

# ifndef MIN

258

# define MIN(a,b) ((a) < (b) ? (a) : (b))

259

# endif

260

# ifndef MAX

261

# define MAX(a,b) ((a) > (b) ? (a) : (b))

# endif

#endif

/* Returns a boolean as to whether the input unsigned number is a power of 2

266

* (2**0, 2**1, etc). In other words if it has just a single bit set.

267

* If not, subtracting 1 would leave the uppermost bit set, so the & would

268

* yield non-zero */

269

#if defined(PERL_CORE) || defined(PERL_EXT)

270

# define isPOWER_OF_2(n) ((n) && ((n) & ((n)-1)) == 0)

#endif

/*

=for apidoc Am|void|__ASSERT_|bool expr

275

276

This is a helper macro to avoid preprocessor issues, replaced by nothing

277

unless under DEBUGGING, where it expands to an assert of its argument,

278

followed by a comma (hence the comma operator). If we just used a straight

279

assert(), we would get a comma with nothing before it when not DEBUGGING.

=cut

We also use empty definition under Coverity since the __ASSERT__

284

checks often check for things that Really Cannot Happen, and Coverity

285

detects that and gets all excited. */

286

287

#if defined(DEBUGGING) && !defined(__COVERITY__)

288

# define __ASSERT_(statement) assert(statement),

289

#else

290

# define __ASSERT_(statement)

#endif

/*

=head1 SV Manipulation Functions

295

296

=for apidoc Ama|SV*|newSVpvs|"literal string"

297

Like C<newSVpvn>, but takes a literal string instead of a

298

string/length pair.

299

300

=for apidoc Ama|SV*|newSVpvs_flags|"literal string"|U32 flags

301

Like C<newSVpvn_flags>, but takes a literal string instead of

302

a string/length pair.

303

304

=for apidoc Ama|SV*|newSVpvs_share|"literal string"

305

Like C<newSVpvn_share>, but takes a literal string instead of

306

a string/length pair and omits the hash parameter.

307

308

309

Like C<sv_catpvn_flags>, but takes a literal string instead

310

of a string/length pair.

311

312

=for apidoc Am|void|sv_catpvs_nomg|SV* sv|"literal string"

313

Like C<sv_catpvn_nomg>, but takes a literal string instead of

314

a string/length pair.

315

316

=for apidoc Am|void|sv_catpvs|SV* sv|"literal string"

317

Like C<sv_catpvn>, but takes a literal string instead of a

318

string/length pair.

319

320

=for apidoc Am|void|sv_catpvs_mg|SV* sv|"literal string"

321

Like C<sv_catpvn_mg>, but takes a literal string instead of a

322

string/length pair.

323

324

=for apidoc Am|void|sv_setpvs|SV* sv|"literal string"

325

Like C<sv_setpvn>, but takes a literal string instead of a

326

string/length pair.

327

328

=for apidoc Am|void|sv_setpvs_mg|SV* sv|"literal string"

329

Like C<sv_setpvn_mg>, but takes a literal string instead of a

string/length pair.

Like C<sv_setref_pvn>, but takes a literal string instead of

334

a string/length pair.

335

336

=head1 Memory Management

337

338

=for apidoc Ama|char*|savepvs|"literal string"

339

Like C<savepvn>, but takes a literal string instead of a

340

string/length pair.

341

342

=for apidoc Ama|char*|savesharedpvs|"literal string"

343

A version of C<savepvs()> which allocates the duplicate string in memory

344

which is shared between threads.

=head1 GV Functions

=for apidoc Am|HV*|gv_stashpvs|"name"|I32 create

349

Like C<gv_stashpvn>, but takes a literal string instead of a

350

string/length pair.

351

352

=head1 Hash Manipulation Functions

353

354

355

Like C<hv_fetch>, but takes a literal string instead of a

string/length pair.

Like C<hv_store>, but takes a literal string instead of a

360

string/length pair

361

and omits the hash parameter.

362

363

=head1 Lexer interface

364

365

=for apidoc Amx|void|lex_stuff_pvs|"pv"|U32 flags

366

367

Like L</lex_stuff_pvn>, but takes a literal string instead of

368

a string/length pair.

=cut

*/

/*

=head1 Handy Values

=for apidoc Amu|pair|STR_WITH_LEN|"literal string"

377

378

Returns two comma separated tokens of the input literal string, and its length.

379

This is convenience macro which helps out in some API calls.

380

Note that it can't be used as an argument to macros or functions that under

381

some configurations might be macros, which means that it requires the full

382

Perl_xxx(aTHX_ ...) form for any API calls where it's used.

=cut

*/

#define STR_WITH_LEN(s) ("" s ""), (sizeof(s)-1)

389

390

/* STR_WITH_LEN() shortcuts */

391

#define newSVpvs(str) Perl_newSVpvn(aTHX_ STR_WITH_LEN(str))

392

#define newSVpvs_flags(str,flags) \

393

Perl_newSVpvn_flags(aTHX_ STR_WITH_LEN(str), flags)

394

#define newSVpvs_share(str) Perl_newSVpvn_share(aTHX_ STR_WITH_LEN(str), 0)

395

#define sv_catpvs_flags(sv, str, flags) \

396

Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), flags)

397

#define sv_catpvs_nomg(sv, str) \

398

Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), 0)

399

#define sv_catpvs(sv, str) \

400

Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), SV_GMAGIC)

401

#define sv_catpvs_mg(sv, str) \

402

Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), SV_GMAGIC|SV_SMAGIC)

403

#define sv_setpvs(sv, str) Perl_sv_setpvn(aTHX_ sv, STR_WITH_LEN(str))

404

#define sv_setpvs_mg(sv, str) Perl_sv_setpvn_mg(aTHX_ sv, STR_WITH_LEN(str))

405

#define sv_setref_pvs(rv, classname, str) \

406

Perl_sv_setref_pvn(aTHX_ rv, classname, STR_WITH_LEN(str))

407

#define savepvs(str) Perl_savepvn(aTHX_ STR_WITH_LEN(str))

408

#define savesharedpvs(str) Perl_savesharedpvn(aTHX_ STR_WITH_LEN(str))

409

#define gv_stashpvs(str, create) \

410

Perl_gv_stashpvn(aTHX_ STR_WITH_LEN(str), create)

411

#define gv_fetchpvs(namebeg, add, sv_type) \

412

Perl_gv_fetchpvn_flags(aTHX_ STR_WITH_LEN(namebeg), add, sv_type)

413

#define gv_fetchpvn(namebeg, len, add, sv_type) \

414

Perl_gv_fetchpvn_flags(aTHX_ namebeg, len, add, sv_type)

415

#define sv_catxmlpvs(dsv, str, utf8) \

416

Perl_sv_catxmlpvn(aTHX_ dsv, STR_WITH_LEN(str), utf8)

417

418

419

#define lex_stuff_pvs(pv,flags) Perl_lex_stuff_pvn(aTHX_ STR_WITH_LEN(pv), flags)

420

421

#define get_cvs(str, flags) \

422

Perl_get_cvn_flags(aTHX_ STR_WITH_LEN(str), (flags))

423

424

/*

425

=head1 Miscellaneous Functions

426

427

=for apidoc Am|bool|strNE|char* s1|char* s2

428

Test two C<NUL>-terminated strings to see if they are different. Returns true

429

or false.

430

431

=for apidoc Am|bool|strEQ|char* s1|char* s2

432

Test two C<NUL>-terminated strings to see if they are equal. Returns true or

433

false.

434

435

=for apidoc Am|bool|strLT|char* s1|char* s2

436

Test two C<NUL>-terminated strings to see if the first, C<s1>, is less than the

437

second, C<s2>. Returns true or false.

438

439

=for apidoc Am|bool|strLE|char* s1|char* s2

440

Test two C<NUL>-terminated strings to see if the first, C<s1>, is less than or

441

equal to the second, C<s2>. Returns true or false.

442

443

=for apidoc Am|bool|strGT|char* s1|char* s2

444

Test two C<NUL>-terminated strings to see if the first, C<s1>, is greater than

445

the second, C<s2>. Returns true or false.

446

447

=for apidoc Am|bool|strGE|char* s1|char* s2

448

Test two C<NUL>-terminated strings to see if the first, C<s1>, is greater than

449

or equal to the second, C<s2>. Returns true or false.

450

451

452

Test two C<NUL>-terminated strings to see if they are different. The C<len>

453

parameter indicates the number of bytes to compare. Returns true or false. (A

454

wrapper for C<strncmp>).

455

456

457

Test two C<NUL>-terminated strings to see if they are equal. The C<len>

458

parameter indicates the number of bytes to compare. Returns true or false. (A

459

wrapper for C<strncmp>).

460

461

462

Test two buffers (which may contain embedded C<NUL> characters, to see if they

463

are equal. The C<len> parameter indicates the number of bytes to compare.

464

Returns zero if equal, or non-zero if non-equal.

465

466

467

Like L</memEQ>, but the second string is a literal enclosed in double quotes,

468

C<l1> gives the number of bytes in C<s1>.

469

Returns zero if equal, or non-zero if non-equal.

470

471

472

Test two buffers (which may contain embedded C<NUL> characters, to see if they

473

are not equal. The C<len> parameter indicates the number of bytes to compare.

474

Returns zero if non-equal, or non-zero if equal.

475

476

477

Like L</memNE>, but the second string is a literal enclosed in double quotes,

478

C<l1> gives the number of bytes in C<s1>.

479

Returns zero if non-equal, or zero if non-equal.

=cut

New macros should use the following conventions for their names (which are

484

based on the underlying C library functions):

485

486

(mem | str n? ) (EQ | NE | LT | GT | GE | (( BEGIN | END ) P? )) l? s?

487

488

Each has two main parameters, string-like operands that are compared

489

against each other, as specified by the macro name. Some macros may

490

additionally have one or potentially even two length parameters. If a length

491

parameter applies to both string parameters, it will be positioned third;

492

otherwise any length parameter immediately follows the string parameter it

493

applies to.

494

495

If the prefix to the name is 'str', the string parameter is a pointer to a C

496

language string. Such a string does not contain embedded NUL bytes; its

497

length may be unknown, but can be calculated by C<strlen()>, since it is

498

terminated by a NUL, which isn't included in its length.

499

500

The optional 'n' following 'str' means that that there is a third parameter,

501

giving the maximum number of bytes to look at in each string. Even if both

502

strings are longer than the length parameter, those extra bytes will be

503

unexamined.

504

505

The 's' suffix means that the 2nd byte string parameter is a literal C

506

double-quoted string. Its length will automatically be calculated by the

507

macro, so no length parameter will ever be needed for it.

508

509

If the prefix is 'mem', the string parameters don't have to be C strings;

510

they may contain embedded NUL bytes, do not necessarily have a terminating

511

NUL, and their lengths can be known only through other means, which in

512

practice are additional parameter(s) passed to the function. All 'mem'

513

functions have at least one length parameter. Barring any 'l' or 's' suffix,

514

there is a single length parameter, in position 3, which applies to both

515

string parameters. The 's' suffix means, as described above, that the 2nd

516

string is a literal double-quoted C string (hence its length is calculated by

517

the macro, and the length parameter to the function applies just to the first

518

string parameter, and hence is positioned just after it). An 'l' suffix

519

means that the 2nd string parameter has its own length parameter, and the

520

signature will look like memFOOl(s1, l1, s2, l2).

521

522

BEGIN (and END) are for testing if the 2nd string is an initial (or final)

523

substring of the 1st string. 'P' if present indicates that the substring

524

must be a "proper" one in tha mathematical sense that the first one must be

525

strictly larger than the 2nd.

*/

#define strNE(s1,s2) (strcmp(s1,s2) != 0)

531

#define strEQ(s1,s2) (strcmp(s1,s2) == 0)

532

#define strLT(s1,s2) (strcmp(s1,s2) < 0)

533

#define strLE(s1,s2) (strcmp(s1,s2) <= 0)

534

#define strGT(s1,s2) (strcmp(s1,s2) > 0)

535

#define strGE(s1,s2) (strcmp(s1,s2) >= 0)

536

537

#define strnNE(s1,s2,l) (strncmp(s1,s2,l) != 0)

538

#define strnEQ(s1,s2,l) (strncmp(s1,s2,l) == 0)

539

540

#define memEQ(s1,s2,l) (memcmp(((const void *) (s1)), ((const void *) (s2)), l) == 0)

541

#define memNE(s1,s2,l) (! memEQ(s1,s2,l))

542

543

/* memEQ and memNE where second comparand is a string constant */

544

#define memEQs(s1, l, s2) \

545

(((sizeof(s2)-1) == (l)) && memEQ((s1), ("" s2 ""), (sizeof(s2)-1)))

546

#define memNEs(s1, l, s2) (! memEQs(s1, l, s2))

547

548

/* Keep these private until we decide it was a good idea */

549

#if defined(PERL_CORE) || defined(PERL_EXT) || defined(PERL_EXT_POSIX)

550

551

#define strBEGINs(s1,s2) (strncmp(s1,"" s2 "", sizeof(s2)-1) == 0)

552

553

#define memBEGINs(s1, l, s2) \

554

( (Ptrdiff_t) (l) >= (Ptrdiff_t) sizeof(s2) - 1 \

555

&& memEQ(s1, "" s2 "", sizeof(s2)-1))

556

#define memBEGINPs(s1, l, s2) \

557

( (Ptrdiff_t) (l) > (Ptrdiff_t) sizeof(s2) - 1 \

558

&& memEQ(s1, "" s2 "", sizeof(s2)-1))

559

#define memENDs(s1, l, s2) \

560

( (Ptrdiff_t) (l) >= (Ptrdiff_t) sizeof(s2) - 1 \

561

&& memEQ(s1 + (l) - (sizeof(s2) - 1), "" s2 "", sizeof(s2)-1))

562

#define memENDPs(s1, l, s2) \

563

( (Ptrdiff_t) (l) > (Ptrdiff_t) sizeof(s2) \

564

&& memEQ(s1 + (l) - (sizeof(s2) - 1), "" s2 "", sizeof(s2)-1))

565

#endif /* End of making macros private */

566

567

#define memLT(s1,s2,l) (memcmp(s1,s2,l) < 0)

568

#define memLE(s1,s2,l) (memcmp(s1,s2,l) <= 0)

569

#define memGT(s1,s2,l) (memcmp(s1,s2,l) > 0)

570

#define memGE(s1,s2,l) (memcmp(s1,s2,l) >= 0)

/*

* Character classes.

*

* Unfortunately, the introduction of locales means that we

576

* can't trust isupper(), etc. to tell the truth. And when

577

* it comes to /\w+/ with tainting enabled, we *must* be able

578

* to trust our character classes.

579

*

580

* Therefore, the default tests in the text of Perl will be

581

* independent of locale. Any code that wants to depend on

582

* the current locale will use the tests that begin with "lc".

583

*/

584

585

#ifdef HAS_SETLOCALE /* XXX Is there a better test for this? */

# ifndef CTYPE256

# define CTYPE256

# endif

#endif

/*

=head1 Character classification

594

This section is about functions (really macros) that classify characters

595

into types, such as punctuation versus alphabetic, etc. Most of these are

596

analogous to regular expression character classes. (See

597

L<perlrecharclass/POSIX Character Classes>.) There are several variants for

598

each class. (Not all macros have all variants; each item below lists the

599

ones valid for it.) None are affected by C<use bytes>, and only the ones

600

with C<LC> in the name are affected by the current locale.

601

602

The base function, e.g., C<isALPHA()>, takes any signed or unsigned value,

603

treating it as a code point, and returns a boolean as to whether or not the

604

character represented by it is (or on non-ASCII platforms, corresponds to) an

605

ASCII character in the named class based on platform, Unicode, and Perl rules.

606

If the input is a number that doesn't fit in an octet, FALSE is returned.

607

608

Variant C<isI<FOO>_A> (e.g., C<isALPHA_A()>) is identical to the base function

609

with no suffix C<"_A">. This variant is used to emphasize by its name that

610

only ASCII-range characters can return TRUE.

611

612

Variant C<isI<FOO>_L1> imposes the Latin-1 (or EBCDIC equivalent) character set

613

onto the platform. That is, the code points that are ASCII are unaffected,

614

since ASCII is a subset of Latin-1. But the non-ASCII code points are treated

615

as if they are Latin-1 characters. For example, C<isWORDCHAR_L1()> will return

616

true when called with the code point 0xDF, which is a word character in both

617

ASCII and EBCDIC (though it represents different characters in each).

618

If the input is a number that doesn't fit in an octet, FALSE is returned.

619

(Perl's documentation uses a colloquial definition of Latin-1, to include all

620

code points below 256.)

621

622

Variant C<isI<FOO>_uvchr> is exactly like the C<isI<FOO>_L1> variant, for

623

inputs below 256, but if the code point is larger than 255, Unicode rules are

624

used to determine if it is in the character class. For example,

625

C<isWORDCHAR_uvchr(0x100)> returns TRUE, since 0x100 is LATIN CAPITAL LETTER A

626

WITH MACRON in Unicode, and is a word character.

627

628

Variants C<isI<FOO>_utf8> and C<isI<FOO>_utf8_safe> are like C<isI<FOO>_uvchr>,

629

but are used for UTF-8 encoded strings. The two forms are different names for

630

the same thing. Each call to one of these classifies the first character of

631

the string starting at C. The second parameter, C<e>, points to anywhere in

632

the string beyond the first character, up to one byte past the end of the

633

entire string. Although both variants are identical, the suffix C<_safe> in

634

one name emphasizes that it will not attempt to read beyond S<C<e - 1>>,

635

provided that the constraint S<C<s E<lt> e>> is true (this is asserted for in

636

C<-DDEBUGGING> builds). If the UTF-8 for the input character is malformed in

637

some way, the program may croak, or the function may return FALSE, at the

638

discretion of the implementation, and subject to change in future releases.

639

640

Variant C<isI<FOO>_LC> is like the C<isI<FOO>_A> and C<isI<FOO>_L1> variants,

641

but the result is based on the current locale, which is what C<LC> in the name

642

stands for. If Perl can determine that the current locale is a UTF-8 locale,

643

it uses the published Unicode rules; otherwise, it uses the C library function

644

that gives the named classification. For example, C<isDIGIT_LC()> when not in

645

a UTF-8 locale returns the result of calling C<isdigit()>. FALSE is always

646

returned if the input won't fit into an octet. On some platforms where the C

647

library function is known to be defective, Perl changes its result to follow

648

the POSIX standard's rules.

649

650

Variant C<isI<FOO>_LC_uvchr> acts exactly like C<isI<FOO>_LC> for inputs less

651

than 256, but for larger ones it returns the Unicode classification of the code

652

point.

653

654

Variants C<isI<FOO>_LC_utf8> and C<isI<FOO>_LC_utf8_safe> are like

655

C<isI<FOO>_LC_uvchr>, but are used for UTF-8 encoded strings. The two forms

656

are different names for the same thing. Each call to one of these classifies

657

the first character of the string starting at C. The second parameter,

658

C<e>, points to anywhere in the string beyond the first character, up to one

659

byte past the end of the entire string. Although both variants are identical,

660

the suffix C<_safe> in one name emphasizes that it will not attempt to read

661

beyond S<C<e - 1>>, provided that the constraint S<C<s E<lt> e>> is true (this

662

is asserted for in C<-DDEBUGGING> builds). If the UTF-8 for the input

663

character is malformed in some way, the program may croak, or the function may

664

return FALSE, at the discretion of the implementation, and subject to change in

665

future releases.

666

667

=for apidoc Am|bool|isALPHA|int ch

668

Returns a boolean indicating whether the specified input is one of C<[A-Za-z]>,

669

analogous to C<m/[[:alpha:]]/>.

670

See the L<top of this section|/Character classification> for an explanation of

671

variants

672

C<isALPHA_A>, C<isALPHA_L1>, C<isALPHA_uvchr>, C<isALPHA_utf8>,

673

C<isALPHA_utf8_safe>, C<isALPHA_LC>, C<isALPHA_LC_uvchr>, C<isALPHA_LC_utf8>,

674

and C<isALPHA_LC_utf8_safe>.

=cut

Here and below, we add the protoypes of these macros for downstream programs

679

that would be interested in them, such as Devel::PPPort

680

681

=for apidoc Amh|bool|isALPHA_A|int ch

682

=for apidoc Amh|bool|isALPHA_L1|int ch

683

=for apidoc Amh|bool|isALPHA_uvchr|int ch

684

=for apidoc Amh|bool|isALPHA_utf8_safe|U8 * s|U8 * end

685

=for apidoc Amh|bool|isALPHA_utf8|U8 * s

686

=for apidoc Amh|bool|isALPHA_LC|int ch

687

=for apidoc Amh|bool|isALPHA_LC_uvchr|int ch

688

=for apidoc Amh|bool|isALPHA_LC_utf8_safe|U8 * s| U8 *end

689

690

=for apidoc Am|bool|isALPHANUMERIC|int ch

691

Returns a boolean indicating whether the specified character is one of

692

C<[A-Za-z0-9]>, analogous to C<m/[[:alnum:]]/>.

693

See the L<top of this section|/Character classification> for an explanation of

694

variants

695

C<isALPHANUMERIC_A>, C<isALPHANUMERIC_L1>, C<isALPHANUMERIC_uvchr>,

696

C<isALPHANUMERIC_utf8>, C<isALPHANUMERIC_utf8_safe>, C<isALPHANUMERIC_LC>,

697

C<isALPHANUMERIC_LC_uvchr>, C<isALPHANUMERIC_LC_utf8>, and

698

C<isALPHANUMERIC_LC_utf8_safe>.

699

700

A (discouraged from use) synonym is C<isALNUMC> (where the C<C> suffix means

701

this corresponds to the C language alphanumeric definition). Also

702

there are the variants

703

C<isALNUMC_A>, C<isALNUMC_L1>

704

C<isALNUMC_LC>, and C<isALNUMC_LC_uvchr>.

705

706

=for apidoc Amh|bool|isALPHANUMERIC_A|int ch

707

=for apidoc Amh|bool|isALPHANUMERIC_L1|int ch

708

=for apidoc Amh|bool|isALPHANUMERIC_uvchr|int ch

709

=for apidoc Amh|bool|isALPHANUMERIC_utf8_safe|U8 * s|U8 * end

710

=for apidoc Amh|bool|isALPHANUMERIC_utf8|U8 * s

711

=for apidoc Amh|bool|isALPHANUMERIC_LC|int ch

712

=for apidoc Amh|bool|isALPHANUMERIC_LC_uvchr|int ch

713

=for apidoc Amh|bool|isALPHANUMERIC_LC_utf8_safe|U8 * s| U8 *end

714

=for apidoc Amh|bool|isALNUMC|int ch

715

=for apidoc Amh|bool|isALNUMC_A|int ch

716

=for apidoc Amh|bool|isALNUMC_L1|int ch

717

=for apidoc Amh|bool|isALNUMC_LC|int ch

718

=for apidoc Amh|bool|isALNUMC_LC_uvchr|int ch

719

720

=for apidoc Am|bool|isASCII|int ch

721

Returns a boolean indicating whether the specified character is one of the 128

722

characters in the ASCII character set, analogous to C<m/[[:ascii:]]/>.

723

On non-ASCII platforms, it returns TRUE iff this

724

character corresponds to an ASCII character. Variants C<isASCII_A()> and

725

C<isASCII_L1()> are identical to C<isASCII()>.

726

See the L<top of this section|/Character classification> for an explanation of

727

variants

728

C<isASCII_uvchr>, C<isASCII_utf8>, C<isASCII_utf8_safe>, C<isASCII_LC>,

729

C<isASCII_LC_uvchr>, C<isASCII_LC_utf8>, and C<isASCII_LC_utf8_safe>.

730

Note, however, that some platforms do not have the C library routine

731

C<isascii()>. In these cases, the variants whose names contain C<LC> are the

732

same as the corresponding ones without.

733

734

=for apidoc Amh|bool|isASCII_A|int ch

735

=for apidoc Amh|bool|isASCII_L1|int ch

736

=for apidoc Amh|bool|isASCII_uvchr|int ch

737

=for apidoc Amh|bool|isASCII_utf8_safe|U8 * s|U8 * end

738

=for apidoc Amh|bool|isASCII_utf8|U8 * s

739

=for apidoc Amh|bool|isASCII_LC|int ch

740

=for apidoc Amh|bool|isASCII_LC_uvchr|int ch

741

=for apidoc Amh|bool|isASCII_LC_utf8_safe|U8 * s| U8 *end

742

743

Also note, that because all ASCII characters are UTF-8 invariant (meaning they

744

have the exact same representation (always a single byte) whether encoded in

745

UTF-8 or not), C<isASCII> will give the correct results when called with any

746

byte in any string encoded or not in UTF-8. And similarly C<isASCII_utf8> and

747

C<isASCII_utf8_safe> will work properly on any string encoded or not in UTF-8.

748

749

=for apidoc Am|bool|isBLANK|char ch

750

Returns a boolean indicating whether the specified character is a

751

character considered to be a blank, analogous to C<m/[[:blank:]]/>.

752

See the L<top of this section|/Character classification> for an explanation of

753

variants

754

C<isBLANK_A>, C<isBLANK_L1>, C<isBLANK_uvchr>, C<isBLANK_utf8>,

755

C<isBLANK_utf8_safe>, C<isBLANK_LC>, C<isBLANK_LC_uvchr>, C<isBLANK_LC_utf8>,

756

and C<isBLANK_LC_utf8_safe>. Note,

757

however, that some platforms do not have the C library routine

758

C<isblank()>. In these cases, the variants whose names contain C<LC> are

759

the same as the corresponding ones without.

760

761

=for apidoc Amh|bool|isBLANK_A|int ch

762

=for apidoc Amh|bool|isBLANK_L1|int ch

763

=for apidoc Amh|bool|isBLANK_uvchr|int ch

764

=for apidoc Amh|bool|isBLANK_utf8_safe|U8 * s|U8 * end

765

=for apidoc Amh|bool|isBLANK_utf8|U8 * s

766

=for apidoc Amh|bool|isBLANK_LC|int ch

767

=for apidoc Amh|bool|isBLANK_LC_uvchr|int ch

768

=for apidoc Amh|bool|isBLANK_LC_utf8_safe|U8 * s| U8 *end

769

770

=for apidoc Am|bool|isCNTRL|char ch

771

Returns a boolean indicating whether the specified character is a

772

control character, analogous to C<m/[[:cntrl:]]/>.

773

See the L<top of this section|/Character classification> for an explanation of

774

variants

775

C<isCNTRL_A>, C<isCNTRL_L1>, C<isCNTRL_uvchr>, C<isCNTRL_utf8>,

776

C<isCNTRL_utf8_safe>, C<isCNTRL_LC>, C<isCNTRL_LC_uvchr>, C<isCNTRL_LC_utf8>

777

and C<isCNTRL_LC_utf8_safe>. On EBCDIC

778

platforms, you almost always want to use the C<isCNTRL_L1> variant.

779

780

=for apidoc Amh|bool|isCNTRL_A|int ch

781

=for apidoc Amh|bool|isCNTRL_L1|int ch

782

=for apidoc Amh|bool|isCNTRL_uvchr|int ch

783

=for apidoc Amh|bool|isCNTRL_utf8_safe|U8 * s|U8 * end

784

=for apidoc Amh|bool|isCNTRL_utf8|U8 * s

785

=for apidoc Amh|bool|isCNTRL_LC|int ch

786

=for apidoc Amh|bool|isCNTRL_LC_uvchr|int ch

787

=for apidoc Amh|bool|isCNTRL_LC_utf8_safe|U8 * s| U8 *end

788

789

=for apidoc Am|bool|isDIGIT|char ch

790

Returns a boolean indicating whether the specified character is a

791

digit, analogous to C<m/[[:digit:]]/>.

792

Variants C<isDIGIT_A> and C<isDIGIT_L1> are identical to C<isDIGIT>.

793

See the L<top of this section|/Character classification> for an explanation of

794

variants

795

C<isDIGIT_uvchr>, C<isDIGIT_utf8>, C<isDIGIT_utf8_safe>, C<isDIGIT_LC>,

796

C<isDIGIT_LC_uvchr>, C<isDIGIT_LC_utf8>, and C<isDIGIT_LC_utf8_safe>.

797

798

=for apidoc Amh|bool|isDIGIT_A|int ch

799

=for apidoc Amh|bool|isDIGIT_L1|int ch

800

=for apidoc Amh|bool|isDIGIT_uvchr|int ch

801

=for apidoc Amh|bool|isDIGIT_utf8_safe|U8 * s|U8 * end

802

=for apidoc Amh|bool|isDIGIT_utf8|U8 * s

803

=for apidoc Amh|bool|isDIGIT_LC|int ch

804

=for apidoc Amh|bool|isDIGIT_LC_uvchr|int ch

805

=for apidoc Amh|bool|isDIGIT_LC_utf8_safe|U8 * s| U8 *end

806

807

=for apidoc Am|bool|isGRAPH|char ch

808

Returns a boolean indicating whether the specified character is a

809

graphic character, analogous to C<m/[[:graph:]]/>.

810

See the L<top of this section|/Character classification> for an explanation of

811

variants C<isGRAPH_A>, C<isGRAPH_L1>, C<isGRAPH_uvchr>, C<isGRAPH_utf8>,

812

C<isGRAPH_utf8_safe>, C<isGRAPH_LC>, C<isGRAPH_LC_uvchr>,

813

C<isGRAPH_LC_utf8_safe>, and C<isGRAPH_LC_utf8_safe>.

814

815

=for apidoc Amh|bool|isGRAPH_A|int ch

816

=for apidoc Amh|bool|isGRAPH_L1|int ch

817

=for apidoc Amh|bool|isGRAPH_uvchr|int ch

818

=for apidoc Amh|bool|isGRAPH_utf8_safe|U8 * s|U8 * end

819

=for apidoc Amh|bool|isGRAPH_utf8|U8 * s

820

=for apidoc Amh|bool|isGRAPH_LC|int ch

821

=for apidoc Amh|bool|isGRAPH_LC_uvchr|int ch

822

=for apidoc Amh|bool|isGRAPH_LC_utf8_safe|U8 * s| U8 *end

823

824

=for apidoc Am|bool|isLOWER|char ch

825

Returns a boolean indicating whether the specified character is a

826

lowercase character, analogous to C<m/[[:lower:]]/>.

827

See the L<top of this section|/Character classification> for an explanation of

828

variants

829

C<isLOWER_A>, C<isLOWER_L1>, C<isLOWER_uvchr>, C<isLOWER_utf8>,

830

C<isLOWER_utf8_safe>, C<isLOWER_LC>, C<isLOWER_LC_uvchr>, C<isLOWER_LC_utf8>,

831

and C<isLOWER_LC_utf8_safe>.

832

833

=for apidoc Amh|bool|isLOWER_A|int ch

834

=for apidoc Amh|bool|isLOWER_L1|int ch

835

=for apidoc Amh|bool|isLOWER_uvchr|int ch

836

=for apidoc Amh|bool|isLOWER_utf8_safe|U8 * s|U8 * end

837

=for apidoc Amh|bool|isLOWER_utf8|U8 * s

838

=for apidoc Amh|bool|isLOWER_LC|int ch

839

=for apidoc Amh|bool|isLOWER_LC_uvchr|int ch

840

=for apidoc Amh|bool|isLOWER_LC_utf8_safe|U8 * s| U8 *end

841

842

=for apidoc Am|bool|isOCTAL|char ch

843

Returns a boolean indicating whether the specified character is an

844

octal digit, [0-7].

845

The only two variants are C<isOCTAL_A> and C<isOCTAL_L1>; each is identical to

846

C<isOCTAL>.

847

848

=for apidoc Amh|bool|isOCTAL_A|int ch

849

=for apidoc Amh|bool|isOCTAL_L1|int ch

850

851

=for apidoc Am|bool|isPUNCT|char ch

852

Returns a boolean indicating whether the specified character is a

853

punctuation character, analogous to C<m/[[:punct:]]/>.

854

Note that the definition of what is punctuation isn't as

855

straightforward as one might desire. See L<perlrecharclass/POSIX Character

856

Classes> for details.

857

See the L<top of this section|/Character classification> for an explanation of

858

variants C<isPUNCT_A>, C<isPUNCT_L1>, C<isPUNCT_uvchr>, C<isPUNCT_utf8>,

859

C<isPUNCT_utf8_safe>, C<isPUNCT_LC>, C<isPUNCT_LC_uvchr>, C<isPUNCT_LC_utf8>,

860

and C<isPUNCT_LC_utf8_safe>.

861

862

=for apidoc Amh|bool|isPUNCT_A|int ch

863

=for apidoc Amh|bool|isPUNCT_L1|int ch

864

=for apidoc Amh|bool|isPUNCT_uvchr|int ch

865

=for apidoc Amh|bool|isPUNCT_utf8_safe|U8 * s|U8 * end

866

=for apidoc Amh|bool|isPUNCT_utf8|U8 * s

867

=for apidoc Amh|bool|isPUNCT_LC|int ch

868

=for apidoc Amh|bool|isPUNCT_LC_uvchr|int ch

869

=for apidoc Amh|bool|isPUNCT_LC_utf8_safe|U8 * s| U8 *end

870

871

=for apidoc Am|bool|isSPACE|char ch

872

Returns a boolean indicating whether the specified character is a

873

whitespace character. This is analogous

874

to what C<m/\s/> matches in a regular expression. Starting in Perl 5.18

875

this also matches what C<m/[[:space:]]/> does. Prior to 5.18, only the

876

locale forms of this macro (the ones with C<LC> in their names) matched

877

precisely what C<m/[[:space:]]/> does. In those releases, the only difference,

878

in the non-locale variants, was that C<isSPACE()> did not match a vertical tab.

879

(See L</isPSXSPC> for a macro that matches a vertical tab in all releases.)

880

See the L<top of this section|/Character classification> for an explanation of

881

variants

882

C<isSPACE_A>, C<isSPACE_L1>, C<isSPACE_uvchr>, C<isSPACE_utf8>,

883

C<isSPACE_utf8_safe>, C<isSPACE_LC>, C<isSPACE_LC_uvchr>, C<isSPACE_LC_utf8>,

884

and C<isSPACE_LC_utf8_safe>.

885

886

=for apidoc Amh|bool|isSPACE_A|int ch

887

=for apidoc Amh|bool|isSPACE_L1|int ch

888

=for apidoc Amh|bool|isSPACE_uvchr|int ch

889

=for apidoc Amh|bool|isSPACE_utf8_safe|U8 * s|U8 * end

890

=for apidoc Amh|bool|isSPACE_utf8|U8 * s

891

=for apidoc Amh|bool|isSPACE_LC|int ch

892

=for apidoc Amh|bool|isSPACE_LC_uvchr|int ch

893

=for apidoc Amh|bool|isSPACE_LC_utf8_safe|U8 * s| U8 *end

894

895

=for apidoc Am|bool|isPSXSPC|char ch

896

(short for Posix Space)

897

Starting in 5.18, this is identical in all its forms to the

898

corresponding C<isSPACE()> macros.

899

The locale forms of this macro are identical to their corresponding

900

C<isSPACE()> forms in all Perl releases. In releases prior to 5.18, the

901

non-locale forms differ from their C<isSPACE()> forms only in that the

902

C<isSPACE()> forms don't match a Vertical Tab, and the C<isPSXSPC()> forms do.

903

Otherwise they are identical. Thus this macro is analogous to what

904

C<m/[[:space:]]/> matches in a regular expression.

905

See the L<top of this section|/Character classification> for an explanation of

906

variants C<isPSXSPC_A>, C<isPSXSPC_L1>, C<isPSXSPC_uvchr>, C<isPSXSPC_utf8>,

907

C<isPSXSPC_utf8_safe>, C<isPSXSPC_LC>, C<isPSXSPC_LC_uvchr>,

908

C<isPSXSPC_LC_utf8>, and C<isPSXSPC_LC_utf8_safe>.

909

910

=for apidoc Amh|bool|isPSXSPC_A|int ch

911

=for apidoc Amh|bool|isPSXSPC_L1|int ch

912

=for apidoc Amh|bool|isPSXSPC_uvchr|int ch

913

=for apidoc Amh|bool|isPSXSPC_utf8_safe|U8 * s|U8 * end

914

=for apidoc Amh|bool|isPSXSPC_utf8|U8 * s

915

=for apidoc Amh|bool|isPSXSPC_LC|int ch

916

=for apidoc Amh|bool|isPSXSPC_LC_uvchr|int ch

917

=for apidoc Amh|bool|isPSXSPC_LC_utf8_safe|U8 * s| U8 *end

918

919

=for apidoc Am|bool|isUPPER|char ch

920

Returns a boolean indicating whether the specified character is an

921

uppercase character, analogous to C<m/[[:upper:]]/>.

922

See the L<top of this section|/Character classification> for an explanation of

923

variants C<isUPPER_A>, C<isUPPER_L1>, C<isUPPER_uvchr>, C<isUPPER_utf8>,

924

C<isUPPER_utf8_safe>, C<isUPPER_LC>, C<isUPPER_LC_uvchr>, C<isUPPER_LC_utf8>,

925

and C<isUPPER_LC_utf8_safe>.

926

927

=for apidoc Amh|bool|isUPPER_A|int ch

928

=for apidoc Amh|bool|isUPPER_L1|int ch

929

=for apidoc Amh|bool|isUPPER_uvchr|int ch

930

=for apidoc Amh|bool|isUPPER_utf8_safe|U8 * s|U8 * end

931

=for apidoc Amh|bool|isUPPER_utf8|U8 * s

932

=for apidoc Amh|bool|isUPPER_LC|int ch

933

=for apidoc Amh|bool|isUPPER_LC_uvchr|int ch

934

=for apidoc Amh|bool|isUPPER_LC_utf8_safe|U8 * s| U8 *end

935

936

=for apidoc Am|bool|isPRINT|char ch

937

Returns a boolean indicating whether the specified character is a

938

printable character, analogous to C<m/[[:print:]]/>.

939

See the L<top of this section|/Character classification> for an explanation of

940

variants

941

C<isPRINT_A>, C<isPRINT_L1>, C<isPRINT_uvchr>, C<isPRINT_utf8>,

942

C<isPRINT_utf8_safe>, C<isPRINT_LC>, C<isPRINT_LC_uvchr>, C<isPRINT_LC_utf8>,

943

and C<isPRINT_LC_utf8_safe>.

944

945

=for apidoc Amh|bool|isPRINT_A|int ch

946

=for apidoc Amh|bool|isPRINT_L1|int ch

947

=for apidoc Amh|bool|isPRINT_uvchr|int ch

948

=for apidoc Amh|bool|isPRINT_utf8_safe|U8 * s|U8 * end

949

=for apidoc Amh|bool|isPRINT_utf8|U8 * s

950

=for apidoc Amh|bool|isPRINT_LC|int ch

951

=for apidoc Amh|bool|isPRINT_LC_uvchr|int ch

952

=for apidoc Amh|bool|isPRINT_LC_utf8_safe|U8 * s| U8 *end

953

954

=for apidoc Am|bool|isWORDCHAR|char ch

955

Returns a boolean indicating whether the specified character is a character

956

that is a word character, analogous to what C<m/\w/> and C<m/[[:word:]]/> match

957

in a regular expression. A word character is an alphabetic character, a

958

decimal digit, a connecting punctuation character (such as an underscore), or

959

a "mark" character that attaches to one of those (like some sort of accent).

960

C<isALNUM()> is a synonym provided for backward compatibility, even though a

961

word character includes more than the standard C language meaning of

962

alphanumeric.

963

See the L<top of this section|/Character classification> for an explanation of

964

variants C<isWORDCHAR_A>, C<isWORDCHAR_L1>, C<isWORDCHAR_uvchr>,

965

C<isWORDCHAR_utf8>, and C<isWORDCHAR_utf8_safe>. C<isWORDCHAR_LC>,

966

C<isWORDCHAR_LC_uvchr>, C<isWORDCHAR_LC_utf8>, and C<isWORDCHAR_LC_utf8_safe>

967

are also as described there, but additionally include the platform's native

968

underscore.

969

970

=for apidoc Amh|bool|isWORDCHAR_A|int ch

971

=for apidoc Amh|bool|isWORDCHAR_L1|int ch

972

=for apidoc Amh|bool|isWORDCHAR_uvchr|int ch

973

=for apidoc Amh|bool|isWORDCHAR_utf8_safe|U8 * s|U8 * end

974

=for apidoc Amh|bool|isWORDCHAR_utf8|U8 * s

975

=for apidoc Amh|bool|isWORDCHAR_LC|int ch

976

=for apidoc Amh|bool|isWORDCHAR_LC_uvchr|int ch

977

=for apidoc Amh|bool|isWORDCHAR_LC_utf8_safe|U8 * s| U8 *end

978

=for apidoc Amh|bool|isALNUM|int ch

979

=for apidoc Amh|bool|isALNUM_A|int ch

980

=for apidoc Amh|bool|isALNUM_LC|int ch

981

=for apidoc Amh|bool|isALNUM_LC_uvchr|int ch

982

983

=for apidoc Am|bool|isXDIGIT|char ch

984

Returns a boolean indicating whether the specified character is a hexadecimal

985

digit. In the ASCII range these are C<[0-9A-Fa-f]>. Variants C<isXDIGIT_A()>

986

and C<isXDIGIT_L1()> are identical to C<isXDIGIT()>.

987

See the L<top of this section|/Character classification> for an explanation of

988

variants

989

C<isXDIGIT_uvchr>, C<isXDIGIT_utf8>, C<isXDIGIT_utf8_safe>, C<isXDIGIT_LC>,

990

C<isXDIGIT_LC_uvchr>, C<isXDIGIT_LC_utf8>, and C<isXDIGIT_LC_utf8_safe>.

991

992

=for apidoc Amh|bool|isXDIGIT_A|int ch

993

=for apidoc Amh|bool|isXDIGIT_L1|int ch

994

=for apidoc Amh|bool|isXDIGIT_uvchr|int ch

995

=for apidoc Amh|bool|isXDIGIT_utf8_safe|U8 * s|U8 * end

996

=for apidoc Amh|bool|isXDIGIT_utf8|U8 * s

997

=for apidoc Amh|bool|isXDIGIT_LC|int ch

998

=for apidoc Amh|bool|isXDIGIT_LC_uvchr|int ch

999

=for apidoc Amh|bool|isXDIGIT_LC_utf8_safe|U8 * s| U8 *end

1000

1001

=for apidoc Am|bool|isIDFIRST|char ch

1002

Returns a boolean indicating whether the specified character can be the first

1003

character of an identifier. This is very close to, but not quite the same as

1004

the official Unicode property C<XID_Start>. The difference is that this

1005

returns true only if the input character also matches L</isWORDCHAR>.

1006

See the L<top of this section|/Character classification> for an explanation of

1007

variants

1008

C<isIDFIRST_A>, C<isIDFIRST_L1>, C<isIDFIRST_uvchr>, C<isIDFIRST_utf8>,

1009

C<isIDFIRST_utf8_safe>, C<isIDFIRST_LC>, C<isIDFIRST_LC_uvchr>,

1010

C<isIDFIRST_LC_utf8>, and C<isIDFIRST_LC_utf8_safe>.

1011

1012

=for apidoc Amh|bool|isIDFIRST_A|int ch

1013

=for apidoc Amh|bool|isIDFIRST_L1|int ch

1014

=for apidoc Amh|bool|isIDFIRST_uvchr|int ch

1015

=for apidoc Amh|bool|isIDFIRST_utf8_safe|U8 * s|U8 * end

1016

=for apidoc Amh|bool|isIDFIRST_utf8|U8 * s

1017

=for apidoc Amh|bool|isIDFIRST_LC|int ch

1018

=for apidoc Amh|bool|isIDFIRST_LC_uvchr|int ch

1019

=for apidoc Amh|bool|isIDFIRST_LC_utf8_safe|U8 * s| U8 *end

1020

1021

=for apidoc Am|bool|isIDCONT|char ch

1022

Returns a boolean indicating whether the specified character can be the

1023

second or succeeding character of an identifier. This is very close to, but

1024

not quite the same as the official Unicode property C<XID_Continue>. The

1025

difference is that this returns true only if the input character also matches

1026

L</isWORDCHAR>. See the L<top of this section|/Character classification> for

1027

an explanation of variants C<isIDCONT_A>, C<isIDCONT_L1>, C<isIDCONT_uvchr>,

1028

C<isIDCONT_utf8>, C<isIDCONT_utf8_safe>, C<isIDCONT_LC>, C<isIDCONT_LC_uvchr>,

1029

C<isIDCONT_LC_utf8>, and C<isIDCONT_LC_utf8_safe>.

1030

1031

=for apidoc Amh|bool|isIDCONT_A|int ch

1032

=for apidoc Amh|bool|isIDCONT_L1|int ch

1033

=for apidoc Amh|bool|isIDCONT_uvchr|int ch

1034

=for apidoc Amh|bool|isIDCONT_utf8_safe|U8 * s|U8 * end

1035

=for apidoc Amh|bool|isIDCONT_utf8|U8 * s

1036

=for apidoc Amh|bool|isIDCONT_LC|int ch

1037

=for apidoc Amh|bool|isIDCONT_LC_uvchr|int ch

1038

=for apidoc Amh|bool|isIDCONT_LC_utf8_safe|U8 * s| U8 *end

1039

1040

=head1 Miscellaneous Functions

1041

1042

=for apidoc Am|U8|READ_XDIGIT|char str*

1043

Returns the value of an ASCII-range hex digit and advances the string pointer.

1044

Behaviour is only well defined when isXDIGIT(*str) is true.

1045

1046

=head1 Character case changing

1047

Perl uses "full" Unicode case mappings. This means that converting a single

1048

character to another case may result in a sequence of more than one character.

1049

For example, the uppercase of C<E<223>> (LATIN SMALL LETTER SHARP S) is the two

1050

character sequence C<SS>. This presents some complications The lowercase of

1051

all characters in the range 0..255 is a single character, and thus

1052

C<L</toLOWER_L1>> is furnished. But, C<toUPPER_L1> can't exist, as it couldn't

1053

return a valid result for all legal inputs. Instead C<L</toUPPER_uvchr>> has

1054

an API that does allow every possible legal result to be returned.) Likewise

1055

no other function that is crippled by not being able to give the correct

1056

results for the full range of possible inputs has been implemented here.

1057

1058

=for apidoc Am|U8|toUPPER|int ch

1059

Converts the specified character to uppercase. If the input is anything but an

1060

ASCII lowercase character, that input character itself is returned. Variant

1061

C<toUPPER_A> is equivalent.

1062

1063

1064

Converts the code point C<cp> to its uppercase version, and

1065

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. The code

1066

point is interpreted as native if less than 256; otherwise as Unicode. Note

1067

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

1068

bytes since the uppercase version may be longer than the original character.

1069

1070

The first code point of the uppercased version is returned

1071

(but note, as explained at L<the top of this section|/Character case

1072

changing>, that there may be more.)

1073

1074

=for apidoc Am|UV|toUPPER_utf8|U8* p|U8* e|U8* s|STRLEN* lenp

1075

Converts the first UTF-8 encoded character in the sequence starting at C and

1076

extending no further than S<C<e - 1>> to its uppercase version, and

1077

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

1078

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

1079

bytes since the uppercase version may be longer than the original character.

1080

1081

The first code point of the uppercased version is returned

1082

(but note, as explained at L<the top of this section|/Character case

1083

changing>, that there may be more).

1084

1085

It will not attempt to read beyond S<C<e - 1>>, provided that the constraint

1086

S<C<s E<lt> e>> is true (this is asserted for in C<-DDEBUGGING> builds). If

1087

the UTF-8 for the input character is malformed in some way, the program may

1088

croak, or the function may return the REPLACEMENT CHARACTER, at the discretion

1089

of the implementation, and subject to change in future releases.

1090

1091

=for apidoc Am|UV|toUPPER_utf8_safe|U8* p|U8* e|U8* s|STRLEN* lenp

1092

Same as L</toUPPER_utf8>.

1093

1094

=for apidoc Am|U8|toFOLD|U8 ch

1095

Converts the specified character to foldcase. If the input is anything but an

1096

ASCII uppercase character, that input character itself is returned. Variant

1097

C<toFOLD_A> is equivalent. (There is no equivalent C<to_FOLD_L1> for the full

1098

Latin1 range, as the full generality of L</toFOLD_uvchr> is needed there.)

1099

1100

1101

Converts the code point C<cp> to its foldcase version, and

1102

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. The code

1103

point is interpreted as native if less than 256; otherwise as Unicode. Note

1104

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

1105

bytes since the foldcase version may be longer than the original character.

1106

1107

The first code point of the foldcased version is returned

1108

(but note, as explained at L<the top of this section|/Character case

1109

changing>, that there may be more).

1110

1111

=for apidoc Am|UV|toFOLD_utf8|U8* p|U8* e|U8* s|STRLEN* lenp

1112

Converts the first UTF-8 encoded character in the sequence starting at C and

1113

extending no further than S<C<e - 1>> to its foldcase version, and

1114

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

1115

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

1116

bytes since the foldcase version may be longer than the original character.

1117

1118

The first code point of the foldcased version is returned

1119

(but note, as explained at L<the top of this section|/Character case

1120

changing>, that there may be more).

1121

1122

It will not attempt

1123

to read beyond S<C<e - 1>>, provided that the constraint S<C<s E<lt> e>> is

1124

true (this is asserted for in C<-DDEBUGGING> builds). If the UTF-8 for the

1125

input character is malformed in some way, the program may croak, or the

1126

function may return the REPLACEMENT CHARACTER, at the discretion of the

1127

implementation, and subject to change in future releases.

1128

1129

=for apidoc Am|UV|toFOLD_utf8_safe|U8* p|U8* e|U8* s|STRLEN* lenp

1130

Same as L</toFOLD_utf8>.

1131

1132

=for apidoc Am|U8|toLOWER|U8 ch

1133

Converts the specified character to lowercase. If the input is anything but an

1134

ASCII uppercase character, that input character itself is returned. Variant

1135

C<toLOWER_A> is equivalent.

1136

1137

=for apidoc Am|U8|toLOWER_L1|U8 ch

1138

Converts the specified Latin1 character to lowercase. The results are

1139

undefined if the input doesn't fit in a byte.

1140

1141

=for apidoc Am|U8|toLOWER_LC|U8 ch

1142

Converts the specified character to lowercase using the current locale's rules,

1143

if possible; otherwise returns the input character itself.

1144

1145

1146

Converts the code point C<cp> to its lowercase version, and

1147

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. The code

1148

point is interpreted as native if less than 256; otherwise as Unicode. Note

1149

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

1150

bytes since the lowercase version may be longer than the original character.

1151

1152

The first code point of the lowercased version is returned

1153

(but note, as explained at L<the top of this section|/Character case

1154

changing>, that there may be more).

1155

1156

=for apidoc Am|UV|toLOWER_utf8|U8* p|U8* e|U8* s|STRLEN* lenp

1157

Converts the first UTF-8 encoded character in the sequence starting at C and

1158

extending no further than S<C<e - 1>> to its lowercase version, and

1159

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

1160

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

1161

bytes since the lowercase version may be longer than the original character.

1162

1163

The first code point of the lowercased version is returned

1164

(but note, as explained at L<the top of this section|/Character case

1165

changing>, that there may be more).

1166

It will not attempt to read beyond S<C<e - 1>>, provided that the constraint

1167

S<C<s E<lt> e>> is true (this is asserted for in C<-DDEBUGGING> builds). If

1168

the UTF-8 for the input character is malformed in some way, the program may

1169

croak, or the function may return the REPLACEMENT CHARACTER, at the discretion

1170

of the implementation, and subject to change in future releases.

1171

1172

=for apidoc Am|UV|toLOWER_utf8_safe|U8* p|U8* e|U8* s|STRLEN* lenp

1173

Same as L</toLOWER_utf8>.

1174

1175

=for apidoc Am|U8|toTITLE|U8 ch

1176

Converts the specified character to titlecase. If the input is anything but an

1177

ASCII lowercase character, that input character itself is returned. Variant

1178

C<toTITLE_A> is equivalent. (There is no C<toTITLE_L1> for the full Latin1

1179

range, as the full generality of L</toTITLE_uvchr> is needed there. Titlecase is

1180

not a concept used in locale handling, so there is no functionality for that.)

1181

1182

1183

Converts the code point C<cp> to its titlecase version, and

1184

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. The code

1185

point is interpreted as native if less than 256; otherwise as Unicode. Note

1186

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

1187

bytes since the titlecase version may be longer than the original character.

1188

1189

The first code point of the titlecased version is returned

1190

(but note, as explained at L<the top of this section|/Character case

1191

changing>, that there may be more).

1192

1193

=for apidoc Am|UV|toTITLE_utf8|U8* p|U8* e|U8* s|STRLEN* lenp

1194

Converts the first UTF-8 encoded character in the sequence starting at C and

1195

extending no further than S<C<e - 1>> to its titlecase version, and

1196

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

1197

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

1198

bytes since the titlecase version may be longer than the original character.

1199

1200

The first code point of the titlecased version is returned

1201

(but note, as explained at L<the top of this section|/Character case

1202

changing>, that there may be more).

1203

1204

It will not attempt

1205

to read beyond S<C<e - 1>>, provided that the constraint S<C<s E<lt> e>> is

1206

true (this is asserted for in C<-DDEBUGGING> builds). If the UTF-8 for the

1207

input character is malformed in some way, the program may croak, or the

1208

function may return the REPLACEMENT CHARACTER, at the discretion of the

1209

implementation, and subject to change in future releases.

1210

1211

=for apidoc Am|UV|toTITLE_utf8_safe|U8* p|U8* e|U8* s|STRLEN* lenp

1212

Same as L</toTITLE_utf8>.

=cut

XXX Still undocumented isVERTWS_uvchr and _utf8; it's unclear what their names

1217

really should be. Also toUPPER_LC and toFOLD_LC, which are subject to change,

1218

and aren't general purpose as they don't work on U+DF, and assert against that.

1219

1220

Note that these macros are repeated in Devel::PPPort, so should also be

1221

patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc

*/

/*

void below because that's the best fit, and works for Devel::PPPort

1227

=for apidoc AmnU|void|WIDEST_UTYPE

1228

1229

Yields the widest unsigned integer type on the platform, currently either

1230

C<U32> or C<64>. This can be used in declarations such as

WIDEST_UTYPE my_uv;

or casts

my_uv = (WIDEST_UTYPE) val;

=cut

*/

#ifdef QUADKIND

# define WIDEST_UTYPE U64

1243

#else

1244

# define WIDEST_UTYPE U32

1245

#endif

1246

1247

/* FITS_IN_8_BITS(c) returns true if c doesn't have a bit set other than in

1248

* the lower 8. It is designed to be hopefully bomb-proof, making sure that no

1249

* bits of information are lost even on a 64-bit machine, but to get the

1250

* compiler to optimize it out if possible. This is because Configure makes

1251

* sure that the machine has an 8-bit byte, so if c is stored in a byte, the

1252

* sizeof() guarantees that this evaluates to a constant true at compile time.

1253

*

1254

* For Coverity, be always true, because otherwise Coverity thinks

1255

* it finds several expressions that are always true, independent

1256

* of operands. Well, they are, but that is kind of the point.

1257

*/

1258

#ifndef __COVERITY__

1259

/* The '| 0' part ensures a compiler error if c is not integer (like e.g., a

1260

* pointer) */

1261

#define FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \

1262

|| !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF))

1263

#else

1264

#define FITS_IN_8_BITS(c) (1)

1265

#endif

1266

1267

/* Returns true if l <= c <= (l + n), where 'l' and 'n' are non-negative

1268

* Written this way so that after optimization, only one conditional test is

1269

* needed. (The NV casts stop any warnings about comparison always being true

1270

* if called with an unsigned. The cast preserves the sign, which is all we

1271

* care about.) */

1272

#define withinCOUNT(c, l, n) (__ASSERT_((NV) (l) >= 0) \

1273

__ASSERT_((NV) (n) >= 0) \

1274

(((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0)))))

1275

1276

/* Returns true if c is in the range l..u, where 'l' is non-negative

1277

* Written this way so that after optimization, only one conditional test is

1278

* needed. */

1279

#define inRANGE(c, l, u) (__ASSERT_((u) >= (l)) \

1280

( (sizeof(c) == sizeof(U8)) ? withinCOUNT(((U8) (c)), (l), ((u) - (l))) \

1281

: (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l))) \

1282

: (__ASSERT_(sizeof(c) == sizeof(WIDEST_UTYPE)) \

1283

withinCOUNT(((WIDEST_UTYPE) (c)), (l), ((u) - (l))))))

#ifdef EBCDIC

# ifndef _ALL_SOURCE

/* The native libc isascii() et.al. functions return the wrong results

1288

* on at least z/OS unless this is defined. */

1289

# error _ALL_SOURCE should probably be defined

1290

# endif

1291

#else

1292

/* There is a simple definition of ASCII for ASCII platforms. But the

1293

* EBCDIC one isn't so simple, so is defined using table look-up like the

1294

* other macros below.

1295

*

1296

* The cast here is used instead of '(c) >= 0', because some compilers emit

1297

* a warning that that test is always true when the parameter is an

1298

* unsigned type. khw supposes that it could be written as

1299

* && ((c) == '\0' || (c) > 0)

1300

* to avoid the message, but the cast will likely avoid extra branches even

1301

* with stupid compilers.

1302

*

1303

* The '| 0' part ensures a compiler error if c is not integer (like e.g.,

1304

* a pointer) */

1305

# define isASCII(c) ((WIDEST_UTYPE)((c) | 0) < 128)

1306

#endif

1307

1308

/* Take the eight possible bit patterns of the lower 3 bits and you get the

1309

* lower 3 bits of the 8 octal digits, in both ASCII and EBCDIC, so those bits

1310

* can be ignored. If the rest match '0', we have an octal */

1311

#define isOCTAL_A(c) (((WIDEST_UTYPE)((c) | 0) & ~7) == '0')

1312

1313

#ifdef H_PERL /* If have access to perl.h, lookup in its table */

1314

1315

/* Character class numbers. For internal core Perl use only. The ones less

1316

* than 32 are used in PL_charclass[] and the ones up through the one that

1317

* corresponds to <_HIGHEST_REGCOMP_DOT_H_SYNC> are used by regcomp.h and

1318

* related files. PL_charclass ones use names used in l1_char_class_tab.h but

1319

* their actual definitions are here. If that file has a name not used here,

1320

* it won't compile.

1321

*

1322

* The first group of these is ordered in what I (khw) estimate to be the

1323

* frequency of their use. This gives a slight edge to exiting a loop earlier

1324

* (in reginclass() in regexec.c). Except \v should be last, as it isn't a

1325

* real Posix character class, and some (small) inefficiencies in regular

1326

* expression handling would be introduced by putting it in the middle of those

1327

* that are. Also, cntrl and ascii come after the others as it may be useful

1328

* to group these which have no members that match above Latin1, (or above

1329

* ASCII in the latter case) */

1330

1331

# define _CC_WORDCHAR 0 /* \w and [:word:] */

1332

# define _CC_DIGIT 1 /* \d and [:digit:] */

1333

# define _CC_ALPHA 2 /* [:alpha:] */

1334

# define _CC_LOWER 3 /* [:lower:] */

1335

# define _CC_UPPER 4 /* [:upper:] */

1336

# define _CC_PUNCT 5 /* [:punct:] */

1337

# define _CC_PRINT 6 /* [:print:] */

1338

# define _CC_ALPHANUMERIC 7 /* [:alnum:] */

1339

# define _CC_GRAPH 8 /* [:graph:] */

1340

# define _CC_CASED 9 /* [:lower:] or [:upper:] under /i */

1341

# define _CC_SPACE 10 /* \s, [:space:] */

1342

# define _CC_BLANK 11 /* [:blank:] */

1343

# define _CC_XDIGIT 12 /* [:xdigit:] */

1344

# define _CC_CNTRL 13 /* [:cntrl:] */

1345

# define _CC_ASCII 14 /* [:ascii:] */

1346

# define _CC_VERTSPACE 15 /* \v */

1347

1348

# define _HIGHEST_REGCOMP_DOT_H_SYNC _CC_VERTSPACE

1349

1350

/* The members of the third group below do not need to be coordinated with data

1351

* structures in regcomp.[ch] and regexec.c. */

1352

# define _CC_IDFIRST 16

1353

# define _CC_CHARNAME_CONT 17

1354

# define _CC_NONLATIN1_FOLD 18

1355

# define _CC_NONLATIN1_SIMPLE_FOLD 19

1356

# define _CC_QUOTEMETA 20

1357

# define _CC_NON_FINAL_FOLD 21

1358

# define _CC_IS_IN_SOME_FOLD 22

1359

# define _CC_MNEMONIC_CNTRL 23

1360

1361

/* This next group is only used on EBCDIC platforms, so theoretically could be

1362

* shared with something entirely different that's only on ASCII platforms */

1363

# define _CC_UTF8_START_BYTE_IS_FOR_AT_LEAST_SURROGATE 31

1364

/* Unused: 24-30

1365

* If more bits are needed, one could add a second word for non-64bit

1366

* QUAD_IS_INT systems, using some #ifdefs to distinguish between having a 2nd

1367

* word or not. The IS_IN_SOME_FOLD bit is the most easily expendable, as it

1368

* is used only for optimization (as of this writing), and differs in the

1369

* Latin1 range from the ALPHA bit only in two relatively unimportant

1370

* characters: the masculine and feminine ordinal indicators, so removing it

1371

* would just cause /i regexes which match them to run less efficiently.

1372

* Similarly the EBCDIC-only bits are used just for speed, and could be

1373

* replaced by other means */

1374

1375

#if defined(PERL_CORE) || defined(PERL_EXT)

1376

/* An enum version of the character class numbers, to help compilers

1377

* optimize */

1378

typedef enum {

1379

_CC_ENUM_ALPHA = _CC_ALPHA,

1380

_CC_ENUM_ALPHANUMERIC = _CC_ALPHANUMERIC,

1381

_CC_ENUM_ASCII = _CC_ASCII,

1382

_CC_ENUM_BLANK = _CC_BLANK,

1383

_CC_ENUM_CASED = _CC_CASED,

1384

_CC_ENUM_CNTRL = _CC_CNTRL,

1385

_CC_ENUM_DIGIT = _CC_DIGIT,

1386

_CC_ENUM_GRAPH = _CC_GRAPH,

1387

_CC_ENUM_LOWER = _CC_LOWER,

1388

_CC_ENUM_PRINT = _CC_PRINT,

1389

_CC_ENUM_PUNCT = _CC_PUNCT,

1390

_CC_ENUM_SPACE = _CC_SPACE,

1391

_CC_ENUM_UPPER = _CC_UPPER,

1392

_CC_ENUM_VERTSPACE = _CC_VERTSPACE,

1393

_CC_ENUM_WORDCHAR = _CC_WORDCHAR,

1394

_CC_ENUM_XDIGIT = _CC_XDIGIT

1395

} _char_class_number;

1396

#endif

1397

1398

#define POSIX_CC_COUNT (_HIGHEST_REGCOMP_DOT_H_SYNC + 1)

START_EXTERN_C

# ifdef DOINIT

EXTCONST U32 PL_charclass[] = {

1403

# include "l1_char_class_tab.h"

1404

};

1405

1406

# else /* ! DOINIT */

1407

EXTCONST U32 PL_charclass[];

# endif

END_EXTERN_C

/* The 1U keeps Solaris from griping when shifting sets the uppermost bit */

1412

# define _CC_mask(classnum) (1U << (classnum))

1413

1414

/* For internal core Perl use only: the base macro for defining macros like

1415

* isALPHA */

1416

# define _generic_isCC(c, classnum) cBOOL(FITS_IN_8_BITS(c) \

1417

&& (PL_charclass[(U8) (c)] & _CC_mask(classnum)))

1418

1419

/* The mask for the _A versions of the macros; it just adds in the bit for

1420

* ASCII. */

1421

# define _CC_mask_A(classnum) (_CC_mask(classnum) | _CC_mask(_CC_ASCII))

1422

1423

/* For internal core Perl use only: the base macro for defining macros like

1424

* isALPHA_A. The foo_A version makes sure that both the desired bit and

1425

* the ASCII bit are present */

1426

# define _generic_isCC_A(c, classnum) (FITS_IN_8_BITS(c) \

1427

&& ((PL_charclass[(U8) (c)] & _CC_mask_A(classnum)) \

1428

== _CC_mask_A(classnum)))

1429

1430

/* On ASCII platforms certain classes form a single range. It's faster to

1431

* special case these. isDIGIT is a single range on all platforms */

1432

# ifdef EBCDIC

1433

# define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA)

1434

# define isGRAPH_A(c) _generic_isCC_A(c, _CC_GRAPH)

1435

# define isLOWER_A(c) _generic_isCC_A(c, _CC_LOWER)

1436

# define isPRINT_A(c) _generic_isCC_A(c, _CC_PRINT)

1437

# define isUPPER_A(c) _generic_isCC_A(c, _CC_UPPER)

1438

# else

1439

/* By folding the upper and lowercase, we can use a single range */

1440

# define isALPHA_A(c) inRANGE((~('A' ^ 'a') & (c)), 'A', 'Z')

1441

# define isGRAPH_A(c) inRANGE(c, ' ' + 1, 0x7e)

1442

# define isLOWER_A(c) inRANGE(c, 'a', 'z')

1443

# define isPRINT_A(c) inRANGE(c, ' ', 0x7e)

1444

# define isUPPER_A(c) inRANGE(c, 'A', 'Z')

1445

# endif

1446

# define isALPHANUMERIC_A(c) _generic_isCC_A(c, _CC_ALPHANUMERIC)

1447

# define isBLANK_A(c) _generic_isCC_A(c, _CC_BLANK)

1448

# define isCNTRL_A(c) _generic_isCC_A(c, _CC_CNTRL)

1449

# define isDIGIT_A(c) inRANGE(c, '0', '9')

1450

# define isPUNCT_A(c) _generic_isCC_A(c, _CC_PUNCT)

1451

# define isSPACE_A(c) _generic_isCC_A(c, _CC_SPACE)

1452

# define isWORDCHAR_A(c) _generic_isCC_A(c, _CC_WORDCHAR)

1453

# define isXDIGIT_A(c) _generic_isCC(c, _CC_XDIGIT) /* No non-ASCII xdigits

1454

*/

1455

# define isIDFIRST_A(c) _generic_isCC_A(c, _CC_IDFIRST)

1456

# define isALPHA_L1(c) _generic_isCC(c, _CC_ALPHA)

1457

# define isALPHANUMERIC_L1(c) _generic_isCC(c, _CC_ALPHANUMERIC)

1458

# define isBLANK_L1(c) _generic_isCC(c, _CC_BLANK)

1459

1460

/* continuation character for legal NAME in \N{NAME} */

1461

# define isCHARNAME_CONT(c) _generic_isCC(c, _CC_CHARNAME_CONT)

1462

1463

# define isCNTRL_L1(c) _generic_isCC(c, _CC_CNTRL)

1464

# define isGRAPH_L1(c) _generic_isCC(c, _CC_GRAPH)

1465

# define isLOWER_L1(c) _generic_isCC(c, _CC_LOWER)

1466

# define isPRINT_L1(c) _generic_isCC(c, _CC_PRINT)

1467

# define isPSXSPC_L1(c) isSPACE_L1(c)

1468

# define isPUNCT_L1(c) _generic_isCC(c, _CC_PUNCT)

1469

# define isSPACE_L1(c) _generic_isCC(c, _CC_SPACE)

1470

# define isUPPER_L1(c) _generic_isCC(c, _CC_UPPER)

1471

# define isWORDCHAR_L1(c) _generic_isCC(c, _CC_WORDCHAR)

1472

# define isIDFIRST_L1(c) _generic_isCC(c, _CC_IDFIRST)

1473

1474

# ifdef EBCDIC

1475

# define isASCII(c) _generic_isCC(c, _CC_ASCII)

1476

# endif

1477

1478

/* Participates in a single-character fold with a character above 255 */

1479

# define _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_SIMPLE_FOLD)))

1480

1481

/* Like the above, but also can be part of a multi-char fold */

1482

# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD)))

1483

1484

# define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)

1485

# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \

1486

_generic_isCC(c, _CC_NON_FINAL_FOLD)

1487

# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \

1488

_generic_isCC(c, _CC_IS_IN_SOME_FOLD)

1489

# define _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \

1490

_generic_isCC(c, _CC_MNEMONIC_CNTRL)

1491

#else /* else we don't have perl.h H_PERL */

1492

1493

/* If we don't have perl.h, we are compiling a utility program. Below we

1494

* hard-code various macro definitions that wouldn't otherwise be available

1495

* to it. Most are coded based on first principles. These are written to

1496

* avoid EBCDIC vs. ASCII #ifdef's as much as possible. */

1497

# define isDIGIT_A(c) inRANGE(c, '0', '9')

1498

# define isBLANK_A(c) ((c) == ' ' || (c) == '\t')

1499

# define isSPACE_A(c) (isBLANK_A(c) \

|| (c) == '\n' \

|| (c) == '\r' \

|| (c) == '\v' \

|| (c) == '\f')

/* On EBCDIC, there are gaps between 'i' and 'j'; 'r' and 's'. Same for

1505

* uppercase. The tests for those aren't necessary on ASCII, but hurt only

1506

* performance (if optimization isn't on), and allow the same code to be

1507

* used for both platform types */

1508

# define isLOWER_A(c) inRANGE((c), 'a', 'i') \

1509

|| inRANGE((c), 'j', 'r') \

1510

|| inRANGE((c), 's', 'z')

1511

# define isUPPER_A(c) inRANGE((c), 'A', 'I') \

1512

|| inRANGE((c), 'J', 'R') \

1513

|| inRANGE((c), 'S', 'Z')

1514

# define isALPHA_A(c) (isUPPER_A(c) || isLOWER_A(c))

1515

# define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c))

1516

# define isWORDCHAR_A(c) (isALPHANUMERIC_A(c) || (c) == '_')

1517

# define isIDFIRST_A(c) (isALPHA_A(c) || (c) == '_')

1518

# define isXDIGIT_A(c) ( isDIGIT_A(c) \

1519

|| inRANGE((c), 'a', 'f') \

1520

|| inRANGE((c), 'A', 'F')

1521

# define isPUNCT_A(c) ((c) == '-' || (c) == '!' || (c) == '"' \

1522

|| (c) == '#' || (c) == '$' || (c) == '%' \

1523

|| (c) == '&' || (c) == '\'' || (c) == '(' \

1524

|| (c) == ')' || (c) == '*' || (c) == '+' \

1525

|| (c) == ',' || (c) == '.' || (c) == '/' \

1526

|| (c) == ':' || (c) == ';' || (c) == '<' \

1527

|| (c) == '=' || (c) == '>' || (c) == '?' \

1528

|| (c) == '@' || (c) == '[' || (c) == '\\' \

1529

|| (c) == ']' || (c) == '^' || (c) == '_' \

1530

|| (c) == '`' || (c) == '{' || (c) == '|' \

1531

|| (c) == '}' || (c) == '~')

1532

# define isGRAPH_A(c) (isALPHANUMERIC_A(c) || isPUNCT_A(c))

1533

# define isPRINT_A(c) (isGRAPH_A(c) || (c) == ' ')

1534

1535

# ifdef EBCDIC

1536

/* The below is accurate for the 3 EBCDIC code pages traditionally

1537

* supported by perl. The only difference between them in the controls

1538

* is the position of \n, and that is represented symbolically below */

1539

# define isCNTRL_A(c) ((c) == '\0' || (c) == '\a' || (c) == '\b' \

1540

|| (c) == '\f' || (c) == '\n' || (c) == '\r' \

1541

|| (c) == '\t' || (c) == '\v' \

1542

|| inRANGE((c), 1, 3) /* SOH, STX, ETX */ \

1543

|| (c) == 7 /* U+7F DEL */ \

1544

|| inRANGE((c), 0x0E, 0x13) /* SO SI DLE \

1545

DC[1-3] */ \

1546

|| (c) == 0x18 /* U+18 CAN */ \

1547

|| (c) == 0x19 /* U+19 EOM */ \

1548

|| inRANGE((c), 0x1C, 0x1F) /* [FGRU]S */ \

1549

|| (c) == 0x26 /* U+17 ETB */ \

1550

|| (c) == 0x27 /* U+1B ESC */ \

1551

|| (c) == 0x2D /* U+05 ENQ */ \

1552

|| (c) == 0x2E /* U+06 ACK */ \

1553

|| (c) == 0x32 /* U+16 SYN */ \

1554

|| (c) == 0x37 /* U+04 EOT */ \

1555

|| (c) == 0x3C /* U+14 DC4 */ \

1556

|| (c) == 0x3D /* U+15 NAK */ \

1557

|| (c) == 0x3F)/* U+1A SUB */

1558

# define isASCII(c) (isCNTRL_A(c) || isPRINT_A(c))

1559

# else /* isASCII is already defined for ASCII platforms, so can use that to

1560

define isCNTRL */

1561

# define isCNTRL_A(c) (isASCII(c) && ! isPRINT_A(c))

1562

# endif

1563

1564

/* The _L1 macros may be unnecessary for the utilities; I (khw) added them

1565

* during debugging, and it seems best to keep them. We may be called

1566

* without NATIVE_TO_LATIN1 being defined. On ASCII platforms, it doesn't

1567

* do anything anyway, so make it not a problem */

1568

# if ! defined(EBCDIC) && ! defined(NATIVE_TO_LATIN1)

1569

# define NATIVE_TO_LATIN1(ch) (ch)

1570

# endif

1571

# define isALPHA_L1(c) (isUPPER_L1(c) || isLOWER_L1(c))

1572

# define isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT_A(c))

1573

# define isBLANK_L1(c) (isBLANK_A(c) \

1574

|| (FITS_IN_8_BITS(c) \

1575

&& NATIVE_TO_LATIN1((U8) c) == 0xA0))

1576

# define isCNTRL_L1(c) (FITS_IN_8_BITS(c) && (! isPRINT_L1(c)))

1577

# define isGRAPH_L1(c) (isPRINT_L1(c) && (! isBLANK_L1(c)))

1578

# define isLOWER_L1(c) (isLOWER_A(c) \

1579

|| (FITS_IN_8_BITS(c) \

1580

&& (( NATIVE_TO_LATIN1((U8) c) >= 0xDF \

1581

&& NATIVE_TO_LATIN1((U8) c) != 0xF7) \

1582

|| NATIVE_TO_LATIN1((U8) c) == 0xAA \

1583

|| NATIVE_TO_LATIN1((U8) c) == 0xBA \

1584

|| NATIVE_TO_LATIN1((U8) c) == 0xB5)))

1585

# define isPRINT_L1(c) (isPRINT_A(c) \

1586

|| (FITS_IN_8_BITS(c) \

1587

&& NATIVE_TO_LATIN1((U8) c) >= 0xA0))

1588

# define isPUNCT_L1(c) (isPUNCT_A(c) \

1589

|| (FITS_IN_8_BITS(c) \

1590

&& ( NATIVE_TO_LATIN1((U8) c) == 0xA1 \

1591

|| NATIVE_TO_LATIN1((U8) c) == 0xA7 \

1592

|| NATIVE_TO_LATIN1((U8) c) == 0xAB \

1593

|| NATIVE_TO_LATIN1((U8) c) == 0xB6 \

1594

|| NATIVE_TO_LATIN1((U8) c) == 0xB7 \

1595

|| NATIVE_TO_LATIN1((U8) c) == 0xBB \

1596

|| NATIVE_TO_LATIN1((U8) c) == 0xBF)))

1597

# define isSPACE_L1(c) (isSPACE_A(c) \

1598

|| (FITS_IN_8_BITS(c) \

1599

&& ( NATIVE_TO_LATIN1((U8) c) == 0x85 \

1600

|| NATIVE_TO_LATIN1((U8) c) == 0xA0)))

1601

# define isUPPER_L1(c) (isUPPER_A(c) \

1602

|| (FITS_IN_8_BITS(c) \

1603

&& ( IN_RANGE(NATIVE_TO_LATIN1((U8) c), \

1604

0xC0, 0xDE) \

1605

&& NATIVE_TO_LATIN1((U8) c) != 0xD7)))

1606

# define isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT_A(c))

1607

# define isIDFIRST_L1(c) (isALPHA_L1(c) || NATIVE_TO_LATIN1(c) == '_')

1608

# define isCHARNAME_CONT(c) (isWORDCHAR_L1(c) \

|| isBLANK_L1(c) \

|| (c) == '-' \

|| (c) == '(' \

|| (c) == ')')

/* The following are not fully accurate in the above-ASCII range. I (khw)

1614

* don't think it's necessary to be so for the purposes where this gets

1615

* compiled */

1616

# define _isQUOTEMETA(c) (FITS_IN_8_BITS(c) && ! isWORDCHAR_L1(c))

1617

# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) isALPHA_L1(c)

1618

1619

/* And these aren't accurate at all. They are useful only for above

1620

* Latin1, which utilities and bootstrapping don't deal with */

1621

# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) 0

1622

# define _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) 0

1623

# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) 0

1624

1625

/* Many of the macros later in this file are defined in terms of these. By

1626

* implementing them with a function, which converts the class number into

1627

* a call to the desired macro, all of the later ones work. However, that

1628

* function won't be actually defined when building a utility program (no

1629

* perl.h), and so a compiler error will be generated if one is attempted

1630

* to be used. And the above-Latin1 code points require Unicode tables to

1631

* be present, something unlikely to be the case when bootstrapping */

1632

# define _generic_isCC(c, classnum) \

1633

(FITS_IN_8_BITS(c) && S_bootstrap_ctype((U8) (c), (classnum), TRUE))

1634

# define _generic_isCC_A(c, classnum) \

1635

(FITS_IN_8_BITS(c) && S_bootstrap_ctype((U8) (c), (classnum), FALSE))

1636

#endif /* End of no perl.h H_PERL */

1637

1638

#define isALPHANUMERIC(c) isALPHANUMERIC_A(c)

1639

#define isALPHA(c) isALPHA_A(c)

1640

#define isASCII_A(c) isASCII(c)

1641

#define isASCII_L1(c) isASCII(c)

1642

#define isBLANK(c) isBLANK_A(c)

1643

#define isCNTRL(c) isCNTRL_A(c)

1644

#define isDIGIT(c) isDIGIT_A(c)

1645

#define isGRAPH(c) isGRAPH_A(c)

1646

#define isIDFIRST(c) isIDFIRST_A(c)

1647

#define isLOWER(c) isLOWER_A(c)

1648

#define isPRINT(c) isPRINT_A(c)

1649

#define isPSXSPC_A(c) isSPACE_A(c)

1650

#define isPSXSPC(c) isPSXSPC_A(c)

1651

#define isPSXSPC_L1(c) isSPACE_L1(c)

1652

#define isPUNCT(c) isPUNCT_A(c)

1653

#define isSPACE(c) isSPACE_A(c)

1654

#define isUPPER(c) isUPPER_A(c)

1655

#define isWORDCHAR(c) isWORDCHAR_A(c)

1656

#define isXDIGIT(c) isXDIGIT_A(c)

1657

1658

/* ASCII casing. These could also be written as

1659

#define toLOWER(c) (isASCII(c) ? toLOWER_LATIN1(c) : (c))

1660

#define toUPPER(c) (isASCII(c) ? toUPPER_LATIN1_MOD(c) : (c))

1661

which uses table lookup and mask instead of subtraction. (This would

1662

work because the _MOD does not apply in the ASCII range).

1663

1664

These actually are UTF-8 invariant casing, not just ASCII, as any non-ASCII

1665

UTF-8 invariants are neither upper nor lower. (Only on EBCDIC platforms are

1666

there non-ASCII invariants, and all of them are controls.) */

1667

#define toLOWER(c) (isUPPER(c) ? (U8)((c) + ('a' - 'A')) : (c))

1668

#define toUPPER(c) (isLOWER(c) ? (U8)((c) - ('a' - 'A')) : (c))

1669

1670

/* In the ASCII range, these are equivalent to what they're here defined to be.

1671

* But by creating these definitions, other code doesn't have to be aware of

1672

* this detail. Actually this works for all UTF-8 invariants, not just the

1673

* ASCII range. (EBCDIC platforms can have non-ASCII invariants.) */

1674

#define toFOLD(c) toLOWER(c)

1675

#define toTITLE(c) toUPPER(c)

1676

1677

#define toLOWER_A(c) toLOWER(c)

1678

#define toUPPER_A(c) toUPPER(c)

1679

#define toFOLD_A(c) toFOLD(c)

1680

#define toTITLE_A(c) toTITLE(c)

1681

1682

/* Use table lookup for speed; returns the input itself if is out-of-range */

1683

#define toLOWER_LATIN1(c) ((! FITS_IN_8_BITS(c)) \

1684

? (c) \

1685

: PL_latin1_lc[ (U8) (c) ])

1686

#define toLOWER_L1(c) toLOWER_LATIN1(c) /* Synonym for consistency */

1687

1688

/* Modified uc. Is correct uc except for three non-ascii chars which are

1689

* all mapped to one of them, and these need special handling; returns the

1690

* input itself if is out-of-range */

1691

#define toUPPER_LATIN1_MOD(c) ((! FITS_IN_8_BITS(c)) \

1692

? (c) \

1693

: PL_mod_latin1_uc[ (U8) (c) ])

1694

#define IN_UTF8_CTYPE_LOCALE PL_in_utf8_CTYPE_locale

1695

1696

/* Use foo_LC_uvchr() instead of these for beyond the Latin1 range */

1697

1698

/* For internal core Perl use only: the base macro for defining macros like

1699

* isALPHA_LC, which uses the current LC_CTYPE locale. 'c' is the code point

1700

* (0-255) to check. In a UTF-8 locale, the result is the same as calling

1701

* isFOO_L1(); the 'utf8_locale_classnum' parameter is something like

1702

* _CC_UPPER, which gives the class number for doing this. For non-UTF-8

1703

* locales, the code to actually do the test this is passed in 'non_utf8'. If

1704

* 'c' is above 255, 0 is returned. For accessing the full range of possible

1705

* code points under locale rules, use the macros based on _generic_LC_uvchr

1706

* instead of this. */

1707

#define _generic_LC_base(c, utf8_locale_classnum, non_utf8) \

1708

(! FITS_IN_8_BITS(c) \

1709

? 0 \

1710

: IN_UTF8_CTYPE_LOCALE \

1711

? cBOOL(PL_charclass[(U8) (c)] & _CC_mask(utf8_locale_classnum)) \

1712

: cBOOL(non_utf8))

1713

1714

/* For internal core Perl use only: a helper macro for defining macros like

1715

* isALPHA_LC. 'c' is the code point (0-255) to check. The function name to

1716

* actually do this test is passed in 'non_utf8_func', which is called on 'c',

1717

* casting 'c' to the macro _LC_CAST, which should not be parenthesized. See

1718

* _generic_LC_base for more info */

1719

#define _generic_LC(c, utf8_locale_classnum, non_utf8_func) \

1720

_generic_LC_base(c,utf8_locale_classnum, \

1721

non_utf8_func( (_LC_CAST) (c)))

1722

1723

/* For internal core Perl use only: like _generic_LC, but also returns TRUE if

1724

* 'c' is the platform's native underscore character */

1725

#define _generic_LC_underscore(c,utf8_locale_classnum,non_utf8_func) \

1726

_generic_LC_base(c, utf8_locale_classnum, \

1727

(non_utf8_func( (_LC_CAST) (c)) \

1728

|| (char)(c) == '_'))

1729

1730

/* These next three are also for internal core Perl use only: case-change

1731

* helper macros. The reason for using the PL_latin arrays is in case the

1732

* system function is defective; it ensures uniform results that conform to the

1733

* Unicod standard. It does not handle the anomalies in UTF-8 Turkic locales */

1734

#define _generic_toLOWER_LC(c, function, cast) (! FITS_IN_8_BITS(c) \

1735

? (c) \

1736

: (IN_UTF8_CTYPE_LOCALE) \

1737

? PL_latin1_lc[ (U8) (c) ] \

1738

: (cast)function((cast)(c)))

1739

1740

/* Note that the result can be larger than a byte in a UTF-8 locale. It

1741

* returns a single value, so can't adequately return the upper case of LATIN

1742

* SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two

1743

* values "SS"); instead it asserts against that under DEBUGGING, and

1744

* otherwise returns its input. It does not handle the anomalies in UTF-8

1745

* Turkic locales. */

1746

#define _generic_toUPPER_LC(c, function, cast) \

1747

(! FITS_IN_8_BITS(c) \

1748

? (c) \

1749

: ((! IN_UTF8_CTYPE_LOCALE) \

1750

? (cast)function((cast)(c)) \

1751

: ((((U8)(c)) == MICRO_SIGN) \

1752

? GREEK_CAPITAL_LETTER_MU \

1753

: ((((U8)(c)) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) \

1754

? LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS \

1755

: ((((U8)(c)) == LATIN_SMALL_LETTER_SHARP_S) \

1756

? (__ASSERT_(0) (c)) \

1757

: PL_mod_latin1_uc[ (U8) (c) ])))))

1758

1759

/* Note that the result can be larger than a byte in a UTF-8 locale. It

1760

* returns a single value, so can't adequately return the fold case of LATIN

1761

* SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two

1762

* values "ss"); instead it asserts against that under DEBUGGING, and

1763

* otherwise returns its input. It does not handle the anomalies in UTF-8

1764

* Turkic locales */

1765

#define _generic_toFOLD_LC(c, function, cast) \

1766

((UNLIKELY((c) == MICRO_SIGN) && IN_UTF8_CTYPE_LOCALE) \

1767

? GREEK_SMALL_LETTER_MU \

1768

: (__ASSERT_(! IN_UTF8_CTYPE_LOCALE \

1769

|| (c) != LATIN_SMALL_LETTER_SHARP_S) \

1770

_generic_toLOWER_LC(c, function, cast)))

1771

1772

/* Use the libc versions for these if available. */

1773

#if defined(HAS_ISASCII)

1774

# define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii( (U8) (c)))

1775

#else

1776

# define isASCII_LC(c) isASCII(c)

1777

#endif

1778

1779

#if defined(HAS_ISBLANK)

1780

# define isBLANK_LC(c) _generic_LC(c, _CC_BLANK, isblank)

1781

#else /* Unlike isASCII, varies if in a UTF-8 locale */

1782

# define isBLANK_LC(c) ((IN_UTF8_CTYPE_LOCALE) ? isBLANK_L1(c) : isBLANK(c))

#endif

#define _LC_CAST U8

#ifdef WIN32

/* The Windows functions don't bother to follow the POSIX standard, which

1789

* for example says that something can't both be a printable and a control.

1790

* But Windows treats the \t control as a printable, and does such things

1791

* as making superscripts into both digits and punctuation. This tames

1792

* these flaws by assuming that the definitions of both controls and space

1793

* are correct, and then making sure that other definitions don't have

1794

* weirdnesses, by making sure that isalnum() isn't also ispunct(), etc.

1795

* Not all possible weirdnesses are checked for, just the ones that were

1796

* detected on actual Microsoft code pages */

1797

1798

# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl)

1799

# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace)

1800

1801

# define isALPHA_LC(c) (_generic_LC(c, _CC_ALPHA, isalpha) \

1802

&& isALPHANUMERIC_LC(c))

1803

# define isALPHANUMERIC_LC(c) (_generic_LC(c, _CC_ALPHANUMERIC, isalnum) && \

1804

! isPUNCT_LC(c))

1805

# define isDIGIT_LC(c) (_generic_LC(c, _CC_DIGIT, isdigit) && \

1806

isALPHANUMERIC_LC(c))

1807

# define isGRAPH_LC(c) (_generic_LC(c, _CC_GRAPH, isgraph) && isPRINT_LC(c))

1808

# define isIDFIRST_LC(c) (((c) == '_') \

1809

|| (_generic_LC(c, _CC_IDFIRST, isalpha) && ! isPUNCT_LC(c)))

1810

# define isLOWER_LC(c) (_generic_LC(c, _CC_LOWER, islower) && isALPHA_LC(c))

1811

# define isPRINT_LC(c) (_generic_LC(c, _CC_PRINT, isprint) && ! isCNTRL_LC(c))

1812

# define isPUNCT_LC(c) (_generic_LC(c, _CC_PUNCT, ispunct) && ! isCNTRL_LC(c))

1813

# define isUPPER_LC(c) (_generic_LC(c, _CC_UPPER, isupper) && isALPHA_LC(c))

1814

# define isWORDCHAR_LC(c) (((c) == '_') || isALPHANUMERIC_LC(c))

1815

# define isXDIGIT_LC(c) (_generic_LC(c, _CC_XDIGIT, isxdigit) \

1816

&& isALPHANUMERIC_LC(c))

1817

1818

# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8)

1819

# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8)

1820

# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8)

1821

1822

#elif defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII))

1823

/* For most other platforms */

1824

1825

# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, isalpha)

1826

# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, isalnum)

1827

# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl)

1828

# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, isdigit)

1829

# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, isgraph)

1830

# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, isalpha)

1831

# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, islower)

1832

# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, isprint)

1833

# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, ispunct)

1834

# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace)

1835

# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, isupper)

1836

# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, isalnum)

1837

# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, isxdigit)

1838

1839

1840

# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8)

1841

# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8)

1842

# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8)

1843

1844

#else /* The final fallback position */

1845

1846

# define isALPHA_LC(c) (isascii(c) && isalpha(c))

1847

# define isALPHANUMERIC_LC(c) (isascii(c) && isalnum(c))

1848

# define isCNTRL_LC(c) (isascii(c) && iscntrl(c))

1849

# define isDIGIT_LC(c) (isascii(c) && isdigit(c))

1850

# define isGRAPH_LC(c) (isascii(c) && isgraph(c))

1851

# define isIDFIRST_LC(c) (isascii(c) && (isalpha(c) || (c) == '_'))

1852

# define isLOWER_LC(c) (isascii(c) && islower(c))

1853

# define isPRINT_LC(c) (isascii(c) && isprint(c))

1854

# define isPUNCT_LC(c) (isascii(c) && ispunct(c))

1855

# define isSPACE_LC(c) (isascii(c) && isspace(c))

1856

# define isUPPER_LC(c) (isascii(c) && isupper(c))

1857

# define isWORDCHAR_LC(c) (isascii(c) && (isalnum(c) || (c) == '_'))

1858

# define isXDIGIT_LC(c) (isascii(c) && isxdigit(c))

1859

1860

# define toLOWER_LC(c) (isascii(c) ? tolower(c) : (c))

1861

# define toUPPER_LC(c) (isascii(c) ? toupper(c) : (c))

1862

# define toFOLD_LC(c) (isascii(c) ? tolower(c) : (c))

#endif

#define isIDCONT(c) isWORDCHAR(c)

1867

#define isIDCONT_A(c) isWORDCHAR_A(c)

1868

#define isIDCONT_L1(c) isWORDCHAR_L1(c)

1869

#define isIDCONT_LC(c) isWORDCHAR_LC(c)

1870

#define isPSXSPC_LC(c) isSPACE_LC(c)

1871

1872

/* For internal core Perl use only: the base macros for defining macros like

1873

* isALPHA_uvchr. 'c' is the code point to check. 'classnum' is the POSIX class

1874

* number defined earlier in this file. _generic_uvchr() is used for POSIX

1875

* classes where there is a macro or function 'above_latin1' that takes the

1876

* single argument 'c' and returns the desired value. These exist for those

1877

* classes which have simple definitions, avoiding the overhead of a hash

1878

* lookup or inversion list binary search. _generic_swash_uvchr() can be used

1879

* for classes where that overhead is faster than a direct lookup.

1880

* _generic_uvchr() won't compile if 'c' isn't unsigned, as it won't match the

1881

* 'above_latin1' prototype. _generic_isCC() macro does bounds checking, so

1882

* have duplicate checks here, so could create versions of the macros that

1883

* don't, but experiments show that gcc optimizes them out anyway. */

1884

1885

/* Note that all ignore 'use bytes' */

1886

#define _generic_uvchr(classnum, above_latin1, c) ((c) < 256 \

1887

? _generic_isCC(c, classnum) \

1888

: above_latin1(c))

1889

#define _generic_swash_uvchr(classnum, c) ((c) < 256 \

1890

? _generic_isCC(c, classnum) \

1891

: _is_uni_FOO(classnum, c))

1892

#define isALPHA_uvchr(c) _generic_swash_uvchr(_CC_ALPHA, c)

1893

#define isALPHANUMERIC_uvchr(c) _generic_swash_uvchr(_CC_ALPHANUMERIC, c)

1894

#define isASCII_uvchr(c) isASCII(c)

1895

#define isBLANK_uvchr(c) _generic_uvchr(_CC_BLANK, is_HORIZWS_cp_high, c)

1896

#define isCNTRL_uvchr(c) isCNTRL_L1(c) /* All controls are in Latin1 */

1897

#define isDIGIT_uvchr(c) _generic_swash_uvchr(_CC_DIGIT, c)

1898

#define isGRAPH_uvchr(c) _generic_swash_uvchr(_CC_GRAPH, c)

1899

#define isIDCONT_uvchr(c) \

1900

_generic_uvchr(_CC_WORDCHAR, _is_uni_perl_idcont, c)

1901

#define isIDFIRST_uvchr(c) \

1902

_generic_uvchr(_CC_IDFIRST, _is_uni_perl_idstart, c)

1903

#define isLOWER_uvchr(c) _generic_swash_uvchr(_CC_LOWER, c)

1904

#define isPRINT_uvchr(c) _generic_swash_uvchr(_CC_PRINT, c)

1905

1906

#define isPUNCT_uvchr(c) _generic_swash_uvchr(_CC_PUNCT, c)

1907

#define isSPACE_uvchr(c) _generic_uvchr(_CC_SPACE, is_XPERLSPACE_cp_high, c)

1908

#define isPSXSPC_uvchr(c) isSPACE_uvchr(c)

1909

1910

#define isUPPER_uvchr(c) _generic_swash_uvchr(_CC_UPPER, c)

1911

#define isVERTWS_uvchr(c) _generic_uvchr(_CC_VERTSPACE, is_VERTWS_cp_high, c)

1912

#define isWORDCHAR_uvchr(c) _generic_swash_uvchr(_CC_WORDCHAR, c)

1913

#define isXDIGIT_uvchr(c) _generic_uvchr(_CC_XDIGIT, is_XDIGIT_cp_high, c)

1914

1915

#define toFOLD_uvchr(c,s,l) to_uni_fold(c,s,l)

1916

#define toLOWER_uvchr(c,s,l) to_uni_lower(c,s,l)

1917

#define toTITLE_uvchr(c,s,l) to_uni_title(c,s,l)

1918

#define toUPPER_uvchr(c,s,l) to_uni_upper(c,s,l)

1919

1920

/* For backwards compatibility, even though '_uni' should mean official Unicode

1921

* code points, in Perl it means native for those below 256 */

1922

#define isALPHA_uni(c) isALPHA_uvchr(c)

1923

#define isALPHANUMERIC_uni(c) isALPHANUMERIC_uvchr(c)

1924

#define isASCII_uni(c) isASCII_uvchr(c)

1925

#define isBLANK_uni(c) isBLANK_uvchr(c)

1926

#define isCNTRL_uni(c) isCNTRL_uvchr(c)

1927

#define isDIGIT_uni(c) isDIGIT_uvchr(c)

1928

#define isGRAPH_uni(c) isGRAPH_uvchr(c)

1929

#define isIDCONT_uni(c) isIDCONT_uvchr(c)

1930

#define isIDFIRST_uni(c) isIDFIRST_uvchr(c)

1931

#define isLOWER_uni(c) isLOWER_uvchr(c)

1932

#define isPRINT_uni(c) isPRINT_uvchr(c)

1933

#define isPUNCT_uni(c) isPUNCT_uvchr(c)

1934

#define isSPACE_uni(c) isSPACE_uvchr(c)

1935

#define isPSXSPC_uni(c) isPSXSPC_uvchr(c)

1936

#define isUPPER_uni(c) isUPPER_uvchr(c)

1937

#define isVERTWS_uni(c) isVERTWS_uvchr(c)

1938

#define isWORDCHAR_uni(c) isWORDCHAR_uvchr(c)

1939

#define isXDIGIT_uni(c) isXDIGIT_uvchr(c)

1940

#define toFOLD_uni(c,s,l) toFOLD_uvchr(c,s,l)

1941

#define toLOWER_uni(c,s,l) toLOWER_uvchr(c,s,l)

1942

#define toTITLE_uni(c,s,l) toTITLE_uvchr(c,s,l)

1943

#define toUPPER_uni(c,s,l) toUPPER_uvchr(c,s,l)

1944

1945

/* For internal core Perl use only: the base macros for defining macros like

1946

* isALPHA_LC_uvchr. These are like isALPHA_LC, but the input can be any code

1947

* point, not just 0-255. Like _generic_uvchr, there are two versions, one for

1948

* simple class definitions; the other for more complex. These are like

1949

* _generic_uvchr, so see it for more info. */

1950

#define _generic_LC_uvchr(latin1, above_latin1, c) \

1951

(c < 256 ? latin1(c) : above_latin1(c))

1952

#define _generic_LC_swash_uvchr(latin1, classnum, c) \

1953

(c < 256 ? latin1(c) : _is_uni_FOO(classnum, c))

1954

1955

#define isALPHA_LC_uvchr(c) _generic_LC_swash_uvchr(isALPHA_LC, _CC_ALPHA, c)

1956

#define isALPHANUMERIC_LC_uvchr(c) _generic_LC_swash_uvchr(isALPHANUMERIC_LC, \

1957

_CC_ALPHANUMERIC, c)

1958

#define isASCII_LC_uvchr(c) isASCII_LC(c)

1959

#define isBLANK_LC_uvchr(c) _generic_LC_uvchr(isBLANK_LC, \

1960

is_HORIZWS_cp_high, c)

1961

#define isCNTRL_LC_uvchr(c) (c < 256 ? isCNTRL_LC(c) : 0)

1962

#define isDIGIT_LC_uvchr(c) _generic_LC_swash_uvchr(isDIGIT_LC, _CC_DIGIT, c)

1963

#define isGRAPH_LC_uvchr(c) _generic_LC_swash_uvchr(isGRAPH_LC, _CC_GRAPH, c)

1964

#define isIDCONT_LC_uvchr(c) _generic_LC_uvchr(isIDCONT_LC, \

1965

_is_uni_perl_idcont, c)

1966

#define isIDFIRST_LC_uvchr(c) _generic_LC_uvchr(isIDFIRST_LC, \

1967

_is_uni_perl_idstart, c)

1968

#define isLOWER_LC_uvchr(c) _generic_LC_swash_uvchr(isLOWER_LC, _CC_LOWER, c)

1969

#define isPRINT_LC_uvchr(c) _generic_LC_swash_uvchr(isPRINT_LC, _CC_PRINT, c)

1970

#define isPSXSPC_LC_uvchr(c) isSPACE_LC_uvchr(c)

1971

#define isPUNCT_LC_uvchr(c) _generic_LC_swash_uvchr(isPUNCT_LC, _CC_PUNCT, c)

1972

#define isSPACE_LC_uvchr(c) _generic_LC_uvchr(isSPACE_LC, \

1973

is_XPERLSPACE_cp_high, c)

1974

#define isUPPER_LC_uvchr(c) _generic_LC_swash_uvchr(isUPPER_LC, _CC_UPPER, c)

1975

#define isWORDCHAR_LC_uvchr(c) _generic_LC_swash_uvchr(isWORDCHAR_LC, \

1976

_CC_WORDCHAR, c)

1977

#define isXDIGIT_LC_uvchr(c) _generic_LC_uvchr(isXDIGIT_LC, \

1978

is_XDIGIT_cp_high, c)

1979

1980

#define isBLANK_LC_uni(c) isBLANK_LC_uvchr(UNI_TO_NATIVE(c))

1981

1982

/* The "_safe" macros make sure that we don't attempt to read beyond 'e', but

1983

* they don't otherwise go out of their way to look for malformed UTF-8. If

1984

* they can return accurate results without knowing if the input is otherwise

1985

* malformed, they do so. For example isASCII is accurate in spite of any

1986

* non-length malformations because it looks only at a single byte. Likewise

1987

* isDIGIT looks just at the first byte for code points 0-255, as all UTF-8

1988

* variant ones return FALSE. But, if the input has to be well-formed in order

1989

* for the results to be accurate, the macros will test and if malformed will

1990

* call a routine to die

1991

*

1992

* Except for toke.c, the macros do assume that e > p, asserting that on

1993

* DEBUGGING builds. Much code that calls these depends on this being true,

1994

* for other reasons. toke.c is treated specially as using the regular

1995

* assertion breaks it in many ways. All strings that these operate on there

1996

* are supposed to have an extra NUL character at the end, so that *e = \0. A

1997

* bunch of code in toke.c assumes that this is true, so the assertion allows

1998

* for that */

1999

#ifdef PERL_IN_TOKE_C

2000

# define _utf8_safe_assert(p,e) ((e) > (p) || ((e) == (p) && *(p) == '\0'))

2001

#else

2002

# define _utf8_safe_assert(p,e) ((e) > (p))

2003

#endif

2004

2005

#define _generic_utf8_safe(classnum, p, e, above_latin1) \

2006

((! _utf8_safe_assert(p, e)) \

2007

? (_force_out_malformed_utf8_message((U8 *) (p), (U8 *) (e), 0, 1), 0)\

2008

: (UTF8_IS_INVARIANT(*(p))) \

2009

? _generic_isCC(*(p), classnum) \

2010

: (UTF8_IS_DOWNGRADEABLE_START(*(p)) \

2011

? ((LIKELY((e) - (p) > 1 && UTF8_IS_CONTINUATION(*((p)+1)))) \

2012

? _generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1 )), \

2013

classnum) \

2014

: (_force_out_malformed_utf8_message( \

2015

(U8 *) (p), (U8 *) (e), 0, 1), 0)) \

2016

: above_latin1))

2017

/* Like the above, but calls 'above_latin1(p)' to get the utf8 value.

2018

* 'above_latin1' can be a macro */

2019

#define _generic_func_utf8_safe(classnum, above_latin1, p, e) \

2020

_generic_utf8_safe(classnum, p, e, above_latin1(p, e))

2021

#define _generic_non_swash_utf8_safe(classnum, above_latin1, p, e) \

2022

_generic_utf8_safe(classnum, p, e, \

2023

(UNLIKELY((e) - (p) < UTF8SKIP(p)) \

2024

? (_force_out_malformed_utf8_message( \

2025

(U8 *) (p), (U8 *) (e), 0, 1), 0) \

2026

: above_latin1(p)))

2027

/* Like the above, but passes classnum to _isFOO_utf8_with_len(), instead of

2028

* having an 'above_latin1' parameter */

2029

#define _generic_swash_utf8_safe(classnum, p, e) \

2030

_generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e))

2031

2032

/* Like the above, but should be used only when it is known that there are no

2033

* characters in the upper-Latin1 range (128-255 on ASCII platforms) which the

2034

* class is TRUE for. Hence it can skip the tests for this range.

2035

* 'above_latin1' should include its arguments */

2036

#define _generic_utf8_safe_no_upper_latin1(classnum, p, e, above_latin1) \

2037

(__ASSERT_(_utf8_safe_assert(p, e)) \

2038

(UTF8_IS_INVARIANT(*(p))) \

2039

? _generic_isCC(*(p), classnum) \

2040

: (UTF8_IS_DOWNGRADEABLE_START(*(p))) \

2041

? 0 /* Note that doesn't check validity for latin1 */ \

: above_latin1)

#define isALPHA_utf8(p, e) isALPHA_utf8_safe(p, e)

2046

#define isALPHANUMERIC_utf8(p, e) isALPHANUMERIC_utf8_safe(p, e)

2047

#define isASCII_utf8(p, e) isASCII_utf8_safe(p, e)

2048

#define isBLANK_utf8(p, e) isBLANK_utf8_safe(p, e)

2049

#define isCNTRL_utf8(p, e) isCNTRL_utf8_safe(p, e)

2050

#define isDIGIT_utf8(p, e) isDIGIT_utf8_safe(p, e)

2051

#define isGRAPH_utf8(p, e) isGRAPH_utf8_safe(p, e)

2052

#define isIDCONT_utf8(p, e) isIDCONT_utf8_safe(p, e)

2053

#define isIDFIRST_utf8(p, e) isIDFIRST_utf8_safe(p, e)

2054

#define isLOWER_utf8(p, e) isLOWER_utf8_safe(p, e)

2055

#define isPRINT_utf8(p, e) isPRINT_utf8_safe(p, e)

2056

#define isPSXSPC_utf8(p, e) isPSXSPC_utf8_safe(p, e)

2057

#define isPUNCT_utf8(p, e) isPUNCT_utf8_safe(p, e)

2058

#define isSPACE_utf8(p, e) isSPACE_utf8_safe(p, e)

2059

#define isUPPER_utf8(p, e) isUPPER_utf8_safe(p, e)

2060

#define isVERTWS_utf8(p, e) isVERTWS_utf8_safe(p, e)

2061

#define isWORDCHAR_utf8(p, e) isWORDCHAR_utf8_safe(p, e)

2062

#define isXDIGIT_utf8(p, e) isXDIGIT_utf8_safe(p, e)

2063

2064

#define isALPHA_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_ALPHA, p, e)

2065

#define isALPHANUMERIC_utf8_safe(p, e) \

2066

_generic_swash_utf8_safe(_CC_ALPHANUMERIC, p, e)

2067

#define isASCII_utf8_safe(p, e) \

2068

/* Because ASCII is invariant under utf8, the non-utf8 macro \

2069

* works */ \

2070

(__ASSERT_(_utf8_safe_assert(p, e)) isASCII(*(p)))

2071

#define isBLANK_utf8_safe(p, e) \

2072

_generic_non_swash_utf8_safe(_CC_BLANK, is_HORIZWS_high, p, e)

2073

2074

#ifdef EBCDIC

2075

/* Because all controls are UTF-8 invariants in EBCDIC, we can use this

2076

* more efficient macro instead of the more general one */

2077

# define isCNTRL_utf8_safe(p, e) \

2078

(__ASSERT_(_utf8_safe_assert(p, e)) isCNTRL_L1(*(p)))

2079

#else

2080

# define isCNTRL_utf8_safe(p, e) _generic_utf8_safe(_CC_CNTRL, p, e, 0)

2081

#endif

2082

2083

#define isDIGIT_utf8_safe(p, e) \

2084

_generic_utf8_safe_no_upper_latin1(_CC_DIGIT, p, e, \

2085

_is_utf8_FOO_with_len(_CC_DIGIT, p, e))

2086

#define isGRAPH_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_GRAPH, p, e)

2087

#define isIDCONT_utf8_safe(p, e) _generic_func_utf8_safe(_CC_WORDCHAR, \

2088

_is_utf8_perl_idcont_with_len, p, e)

2089

2090

/* To prevent S_scan_word in toke.c from hanging, we have to make sure that

2091

* IDFIRST is an alnum. See

2092

* http://rt.perl.org/rt3/Ticket/Display.html?id=74022 for more detail than you

2093

* ever wanted to know about. (In the ASCII range, there isn't a difference.)

2094

* This used to be not the XID version, but we decided to go with the more

2095

* modern Unicode definition */

2096

#define isIDFIRST_utf8_safe(p, e) \

2097

_generic_func_utf8_safe(_CC_IDFIRST, \

2098

_is_utf8_perl_idstart_with_len, (U8 *) (p), (U8 *) (e))

2099

2100

#define isLOWER_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_LOWER, p, e)

2101

#define isPRINT_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_PRINT, p, e)

2102

#define isPSXSPC_utf8_safe(p, e) isSPACE_utf8_safe(p, e)

2103

#define isPUNCT_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_PUNCT, p, e)

2104

#define isSPACE_utf8_safe(p, e) \

2105

_generic_non_swash_utf8_safe(_CC_SPACE, is_XPERLSPACE_high, p, e)

2106

#define isUPPER_utf8_safe(p, e) _generic_swash_utf8_safe(_CC_UPPER, p, e)

2107

#define isVERTWS_utf8_safe(p, e) \

2108

_generic_non_swash_utf8_safe(_CC_VERTSPACE, is_VERTWS_high, p, e)

2109

#define isWORDCHAR_utf8_safe(p, e) \

2110

_generic_swash_utf8_safe(_CC_WORDCHAR, p, e)

2111

#define isXDIGIT_utf8_safe(p, e) \

2112

_generic_utf8_safe_no_upper_latin1(_CC_XDIGIT, p, e, \

2113

(UNLIKELY((e) - (p) < UTF8SKIP(p)) \

2114

? (_force_out_malformed_utf8_message( \

2115

(U8 *) (p), (U8 *) (e), 0, 1), 0) \

2116

: is_XDIGIT_high(p)))

2117

2118

#define toFOLD_utf8(p,e,s,l) toFOLD_utf8_safe(p,e,s,l)

2119

#define toLOWER_utf8(p,e,s,l) toLOWER_utf8_safe(p,e,s,l)

2120

#define toTITLE_utf8(p,e,s,l) toTITLE_utf8_safe(p,e,s,l)

2121

#define toUPPER_utf8(p,e,s,l) toUPPER_utf8_safe(p,e,s,l)

2122

2123

/* For internal core use only, subject to change */

2124

#define _toFOLD_utf8_flags(p,e,s,l,f) _to_utf8_fold_flags (p,e,s,l,f)

2125

#define _toLOWER_utf8_flags(p,e,s,l,f) _to_utf8_lower_flags(p,e,s,l,f)

2126

#define _toTITLE_utf8_flags(p,e,s,l,f) _to_utf8_title_flags(p,e,s,l,f)

2127

#define _toUPPER_utf8_flags(p,e,s,l,f) _to_utf8_upper_flags(p,e,s,l,f)

2128

2129

#define toFOLD_utf8_safe(p,e,s,l) _toFOLD_utf8_flags(p,e,s,l, FOLD_FLAGS_FULL)

2130

#define toLOWER_utf8_safe(p,e,s,l) _toLOWER_utf8_flags(p,e,s,l, 0)

2131

#define toTITLE_utf8_safe(p,e,s,l) _toTITLE_utf8_flags(p,e,s,l, 0)

2132

#define toUPPER_utf8_safe(p,e,s,l) _toUPPER_utf8_flags(p,e,s,l, 0)

2133

2134

#define isALPHA_LC_utf8(p, e) isALPHA_LC_utf8_safe(p, e)

2135

#define isALPHANUMERIC_LC_utf8(p, e) isALPHANUMERIC_LC_utf8_safe(p, e)

2136

#define isASCII_LC_utf8(p, e) isASCII_LC_utf8_safe(p, e)

2137

#define isBLANK_LC_utf8(p, e) isBLANK_LC_utf8_safe(p, e)

2138

#define isCNTRL_LC_utf8(p, e) isCNTRL_LC_utf8_safe(p, e)

2139

#define isDIGIT_LC_utf8(p, e) isDIGIT_LC_utf8_safe(p, e)

2140

#define isGRAPH_LC_utf8(p, e) isGRAPH_LC_utf8_safe(p, e)

2141

#define isIDCONT_LC_utf8(p, e) isIDCONT_LC_utf8_safe(p, e)

2142

#define isIDFIRST_LC_utf8(p, e) isIDFIRST_LC_utf8_safe(p, e)

2143

#define isLOWER_LC_utf8(p, e) isLOWER_LC_utf8_safe(p, e)

2144

#define isPRINT_LC_utf8(p, e) isPRINT_LC_utf8_safe(p, e)

2145

#define isPSXSPC_LC_utf8(p, e) isPSXSPC_LC_utf8_safe(p, e)

2146

#define isPUNCT_LC_utf8(p, e) isPUNCT_LC_utf8_safe(p, e)

2147

#define isSPACE_LC_utf8(p, e) isSPACE_LC_utf8_safe(p, e)

2148

#define isUPPER_LC_utf8(p, e) isUPPER_LC_utf8_safe(p, e)

2149

#define isWORDCHAR_LC_utf8(p, e) isWORDCHAR_LC_utf8_safe(p, e)

2150

#define isXDIGIT_LC_utf8(p, e) isXDIGIT_LC_utf8_safe(p, e)

2151

2152

/* For internal core Perl use only: the base macros for defining macros like

2153

* isALPHA_LC_utf8_safe. These are like _generic_utf8, but if the first code

2154

* point in 'p' is within the 0-255 range, it uses locale rules from the

2155

* passed-in 'macro' parameter */

2156

#define _generic_LC_utf8_safe(macro, p, e, above_latin1) \

2157

(__ASSERT_(_utf8_safe_assert(p, e)) \

2158

(UTF8_IS_INVARIANT(*(p))) \

2159

? macro(*(p)) \

2160

: (UTF8_IS_DOWNGRADEABLE_START(*(p)) \

2161

? ((LIKELY((e) - (p) > 1 && UTF8_IS_CONTINUATION(*((p)+1)))) \

2162

? macro(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1))) \

2163

: (_force_out_malformed_utf8_message( \

2164

(U8 *) (p), (U8 *) (e), 0, 1), 0)) \

2165

: above_latin1))

2166

2167

#define _generic_LC_swash_utf8_safe(macro, classnum, p, e) \

2168

_generic_LC_utf8_safe(macro, p, e, \

2169

_is_utf8_FOO_with_len(classnum, p, e))

2170

2171

#define _generic_LC_func_utf8_safe(macro, above_latin1, p, e) \

2172

_generic_LC_utf8_safe(macro, p, e, above_latin1(p, e))

2173

2174

#define _generic_LC_non_swash_utf8_safe(classnum, above_latin1, p, e) \

2175

_generic_LC_utf8_safe(classnum, p, e, \

2176

(UNLIKELY((e) - (p) < UTF8SKIP(p)) \

2177

? (_force_out_malformed_utf8_message( \

2178

(U8 *) (p), (U8 *) (e), 0, 1), 0) \

2179

: above_latin1(p)))

2180

2181

#define isALPHANUMERIC_LC_utf8_safe(p, e) \

2182

_generic_LC_swash_utf8_safe(isALPHANUMERIC_LC, \

2183

_CC_ALPHANUMERIC, p, e)

2184

#define isALPHA_LC_utf8_safe(p, e) \

2185

_generic_LC_swash_utf8_safe(isALPHA_LC, _CC_ALPHA, p, e)

2186

#define isASCII_LC_utf8_safe(p, e) \

2187

(__ASSERT_(_utf8_safe_assert(p, e)) isASCII_LC(*(p)))

2188

#define isBLANK_LC_utf8_safe(p, e) \

2189

_generic_LC_non_swash_utf8_safe(isBLANK_LC, is_HORIZWS_high, p, e)

2190

#define isCNTRL_LC_utf8_safe(p, e) \

2191

_generic_LC_utf8_safe(isCNTRL_LC, p, e, 0)

2192

#define isDIGIT_LC_utf8_safe(p, e) \

2193

_generic_LC_swash_utf8_safe(isDIGIT_LC, _CC_DIGIT, p, e)

2194

#define isGRAPH_LC_utf8_safe(p, e) \

2195

_generic_LC_swash_utf8_safe(isGRAPH_LC, _CC_GRAPH, p, e)

2196

#define isIDCONT_LC_utf8_safe(p, e) \

2197

_generic_LC_func_utf8_safe(isIDCONT_LC, \

2198

_is_utf8_perl_idcont_with_len, p, e)

2199

#define isIDFIRST_LC_utf8_safe(p, e) \

2200

_generic_LC_func_utf8_safe(isIDFIRST_LC, \

2201

_is_utf8_perl_idstart_with_len, p, e)

2202

#define isLOWER_LC_utf8_safe(p, e) \

2203

_generic_LC_swash_utf8_safe(isLOWER_LC, _CC_LOWER, p, e)

2204

#define isPRINT_LC_utf8_safe(p, e) \

2205

_generic_LC_swash_utf8_safe(isPRINT_LC, _CC_PRINT, p, e)

2206

#define isPSXSPC_LC_utf8_safe(p, e) isSPACE_LC_utf8_safe(p, e)

2207

#define isPUNCT_LC_utf8_safe(p, e) \

2208

_generic_LC_swash_utf8_safe(isPUNCT_LC, _CC_PUNCT, p, e)

2209

#define isSPACE_LC_utf8_safe(p, e) \

2210

_generic_LC_non_swash_utf8_safe(isSPACE_LC, is_XPERLSPACE_high, p, e)

2211

#define isUPPER_LC_utf8_safe(p, e) \

2212

_generic_LC_swash_utf8_safe(isUPPER_LC, _CC_UPPER, p, e)

2213

#define isWORDCHAR_LC_utf8_safe(p, e) \

2214

_generic_LC_swash_utf8_safe(isWORDCHAR_LC, _CC_WORDCHAR, p, e)

2215

#define isXDIGIT_LC_utf8_safe(p, e) \

2216

_generic_LC_non_swash_utf8_safe(isXDIGIT_LC, is_XDIGIT_high, p, e)

2217

2218

/* Macros for backwards compatibility and for completeness when the ASCII and

2219

* Latin1 values are identical */

2220

#define isALPHAU(c) isALPHA_L1(c)

2221

#define isDIGIT_L1(c) isDIGIT_A(c)

2222

#define isOCTAL(c) isOCTAL_A(c)

2223

#define isOCTAL_L1(c) isOCTAL_A(c)

2224

#define isXDIGIT_L1(c) isXDIGIT_A(c)

2225

#define isALNUM(c) isWORDCHAR(c)

2226

#define isALNUM_A(c) isALNUM(c)

2227

#define isALNUMU(c) isWORDCHAR_L1(c)

2228

#define isALNUM_LC(c) isWORDCHAR_LC(c)

2229

#define isALNUM_uni(c) isWORDCHAR_uni(c)

2230

#define isALNUM_LC_uvchr(c) isWORDCHAR_LC_uvchr(c)

2231

#define isALNUM_utf8(p,e) isWORDCHAR_utf8(p,e)

2232

#define isALNUM_LC_utf8(p,e)isWORDCHAR_LC_utf8(p,e)

2233

#define isALNUMC_A(c) isALPHANUMERIC_A(c) /* Mnemonic: "C's alnum" */

2234

#define isALNUMC_L1(c) isALPHANUMERIC_L1(c)

2235

#define isALNUMC(c) isALPHANUMERIC(c)

2236

#define isALNUMC_LC(c) isALPHANUMERIC_LC(c)

2237

#define isALNUMC_uni(c) isALPHANUMERIC_uni(c)

2238

#define isALNUMC_LC_uvchr(c) isALPHANUMERIC_LC_uvchr(c)

2239

#define isALNUMC_utf8(p,e) isALPHANUMERIC_utf8(p,e)

2240

#define isALNUMC_LC_utf8(p,e) isALPHANUMERIC_LC_utf8(p,e)

2241

2242

/* On EBCDIC platforms, CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII,

2243

* except that they don't necessarily mean the same characters, e.g. CTRL-D is

2244

* 4 on both systems, but that is EOT on ASCII; ST on EBCDIC.

2245

* '?' is special-cased on EBCDIC to APC, which is the control there that is

2246

* the outlier from the block that contains the other controls, just like

2247

* toCTRL('?') on ASCII yields DEL, the control that is the outlier from the C0

2248

* block. If it weren't special cased, it would yield a non-control.

2249

* The conversion works both ways, so toCTRL('D') is 4, and toCTRL(4) is D,

2250

* etc. */

2251

#ifndef EBCDIC

2252

# define toCTRL(c) (__ASSERT_(FITS_IN_8_BITS(c)) toUPPER(((U8)(c))) ^ 64)

2253

#else

2254

# define toCTRL(c) (__ASSERT_(FITS_IN_8_BITS(c)) \

2255

((isPRINT_A(c)) \

2256

? (UNLIKELY((c) == '?') \

2257

? QUESTION_MARK_CTRL \

2258

: (NATIVE_TO_LATIN1(toUPPER((U8) (c))) ^ 64)) \

2259

: (UNLIKELY((c) == QUESTION_MARK_CTRL) \

2260

? '?' \

2261

: (LATIN1_TO_NATIVE(((U8) (c)) ^ 64)))))

2262

#endif

2263

2264

/* Line numbers are unsigned, 32 bits. */

2265

typedef U32 line_t;

2266

#define NOLINE ((line_t) 4294967295UL) /* = FFFFFFFF */

2267

2268

/* Helpful alias for version prescan */

2269

#define is_LAX_VERSION(a,b) \

2270

(a != Perl_prescan_version(aTHX_ a, FALSE, b, NULL, NULL, NULL, NULL))

2271

2272

#define is_STRICT_VERSION(a,b) \

2273

(a != Perl_prescan_version(aTHX_ a, TRUE, b, NULL, NULL, NULL, NULL))

2274

2275

#define BADVERSION(a,b,c) \

if (b) { \

*b = c; \

} \

return a;

/* Converts a character known to represent a hexadecimal digit (0-9, A-F, or

2282

* a-f) to its numeric value. READ_XDIGIT's argument is a string pointer,

2283

* which is advanced. The input is validated only by an assert() in DEBUGGING

2284

* builds. In both ASCII and EBCDIC the last 4 bits of the digits are 0-9; and

2285

* the last 4 bits of A-F and a-f are 1-6, so adding 9 yields 10-15 */

2286

#define XDIGIT_VALUE(c) (__ASSERT_(isXDIGIT(c)) (0xf & (isDIGIT(c) \

2287

? (c) \

2288

: ((c) + 9))))

2289

#define READ_XDIGIT(s) (__ASSERT_(isXDIGIT(*s)) (0xf & (isDIGIT(*(s)) \

? (*(s)++) \

: (*(s)++ + 9))))

/* Converts a character known to represent an octal digit (0-7) to its numeric

2294

* value. The input is validated only by an assert() in DEBUGGING builds. In

2295

* both ASCII and EBCDIC the last 3 bits of the octal digits range from 0-7. */

2296

#define OCTAL_VALUE(c) (__ASSERT_(isOCTAL(c)) (7 & (c)))

2297

2298

/* Efficiently returns a boolean as to if two native characters are equivalent

2299

* case-insenstively. At least one of the characters must be one of [A-Za-z];

2300

* the ALPHA in the name is to remind you of that. This is asserted() in

2301

* DEBUGGING builds. Because [A-Za-z] are invariant under UTF-8, this macro

2302

* works (on valid input) for both non- and UTF-8-encoded bytes.

2303

*

2304

* When one of the inputs is a compile-time constant and gets folded by the

2305

* compiler, this reduces to an AND and a TEST. On both EBCDIC and ASCII

2306

* machines, 'A' and 'a' differ by a single bit; the same with the upper and

2307

* lower case of all other ASCII-range alphabetics. On ASCII platforms, they

2308

* are 32 apart; on EBCDIC, they are 64. At compile time, this uses an

2309

* exclusive 'or' to find that bit and then inverts it to form a mask, with

2310

* just a single 0, in the bit position where the upper- and lowercase differ.

2311

* */

2312

#define isALPHA_FOLD_EQ(c1, c2) \

2313

(__ASSERT_(isALPHA_A(c1) || isALPHA_A(c2)) \

2314

((c1) & ~('A' ^ 'a')) == ((c2) & ~('A' ^ 'a')))

2315

#define isALPHA_FOLD_NE(c1, c2) (! isALPHA_FOLD_EQ((c1), (c2)))

2316

2317

/*

2318

=head1 Memory Management

2319

2320

2321

The XSUB-writer's interface to the C C<malloc> function.

2322

2323

Memory obtained by this should B<ONLY> be freed with L</"Safefree">.

2324

2325

In 5.9.3, Newx() and friends replace the older New() API, and drops

2326

the first parameter, I<x>, a debug aid which allowed callers to identify

2327

themselves. This aid has been superseded by a new build option,

2328

PERL_MEM_LOG (see L<perlhacktips/PERL_MEM_LOG>). The older API is still

2329

there for use in XS modules supporting older perls.

2330

2331

2332

The XSUB-writer's interface to the C C<malloc> function, with

2333

cast. See also C<L</Newx>>.

2334

2335

Memory obtained by this should B<ONLY> be freed with L</"Safefree">.

2336

2337

2338

The XSUB-writer's interface to the C C<malloc> function. The allocated

2339

memory is zeroed with C<memzero>. See also C<L</Newx>>.

2340

2341

Memory obtained by this should B<ONLY> be freed with L</"Safefree">.

2342

2343

2344

The XSUB-writer's interface to the C C<realloc> function.

2345

2346

Memory obtained by this should B<ONLY> be freed with L</"Safefree">.

2347

2348

2349

The XSUB-writer's interface to the C C<realloc> function, with

2350

cast.

2351

2352

Memory obtained by this should B<ONLY> be freed with L</"Safefree">.

2353

2354

=for apidoc Am|void|Safefree|void* ptr

2355

The XSUB-writer's interface to the C C<free> function.

2356

2357

This should B<ONLY> be used on memory obtained using L</"Newx"> and friends.

2358

2359

2360

The XSUB-writer's interface to the C C<memmove> function. The C<src> is the

2361

source, C<dest> is the destination, C<nitems> is the number of items, and

2362

C<type> is the type. Can do overlapping moves. See also C<L</Copy>>.

2363

2364

2365

Like C<Move> but returns C<dest>. Useful

2366

for encouraging compilers to tail-call

optimise.

The XSUB-writer's interface to the C C<memcpy> function. The C<src> is the

2371

source, C<dest> is the destination, C<nitems> is the number of items, and

2372

C<type> is the type. May fail on overlapping copies. See also C<L</Move>>.

Like C<Copy> but returns C<dest>. Useful

2377

for encouraging compilers to tail-call

optimise.

The XSUB-writer's interface to the C C<memzero> function. The C<dest> is the

2383

destination, C<nitems> is the number of items, and C<type> is the type.

Like C<Zero> but returns dest. Useful

2388

for encouraging compilers to tail-call

optimise.

This is an architecture-independent macro to copy one structure to another.

Fill up memory with a byte pattern (a byte repeated over and over

2397

again) that hopefully catches attempts to access uninitialized memory.

PoisonWith(0xAB) for catching access to allocated but uninitialized memory.

PoisonWith(0xEF) for catching access to freed memory.

PoisonWith(0xEF) for catching access to freed memory.

=cut */

/* Maintained for backwards-compatibility only. Use newSV() instead. */

2414

#ifndef PERL_CORE

2415

#define NEWSV(x,len) newSV(len)

2416

#endif

2417

2418

#define MEM_SIZE_MAX ((MEM_SIZE)-1)

2419

2420

#define _PERL_STRLEN_ROUNDUP_UNCHECKED(n) (((n) - 1 + PERL_STRLEN_ROUNDUP_QUANTUM) & ~((MEM_SIZE)PERL_STRLEN_ROUNDUP_QUANTUM - 1))

2421

2422

#ifdef PERL_MALLOC_WRAP

2423

2424

/* This expression will be constant-folded at compile time. It checks

2425

* whether or not the type of the count n is so small (e.g. U8 or U16, or

2426

* U32 on 64-bit systems) that there's no way a wrap-around could occur.

2427

* As well as avoiding the need for a run-time check in some cases, it's

2428

* designed to avoid compiler warnings like:

2429

* comparison is always false due to limited range of data type

2430

* It's mathematically equivalent to

2431

* max(n) * sizeof(t) > MEM_SIZE_MAX

2432

*/

2433

2434

# define _MEM_WRAP_NEEDS_RUNTIME_CHECK(n,t) \

2435

( sizeof(MEM_SIZE) < sizeof(n) \

2436

|| sizeof(t) > ((MEM_SIZE)1 << 8*(sizeof(MEM_SIZE) - sizeof(n))))

2437

2438

/* This is written in a slightly odd way to avoid various spurious

2439

* compiler warnings. We *want* to write the expression as

2440

* _MEM_WRAP_NEEDS_RUNTIME_CHECK(n,t) && (n > C)

2441

* (for some compile-time constant C), but even when the LHS

2442

* constant-folds to false at compile-time, g++ insists on emitting

2443

* warnings about the RHS (e.g. "comparison is always false"), so instead

* we write it as

*

* (cond ? n : X) > C

*

* where X is a constant with X > C always false. Choosing a value for X

2449

* is tricky. If 0, some compilers will complain about 0 > C always being

2450

* false; if 1, Coverity complains when n happens to be the constant value

2451

* '1', that cond ? 1 : 1 has the same value on both branches; so use C

2452

* for X and hope that nothing else whines.

2453

*/

2454

2455

# define _MEM_WRAP_WILL_WRAP(n,t) \

2456

((_MEM_WRAP_NEEDS_RUNTIME_CHECK(n,t) ? (MEM_SIZE)(n) : \

2457

MEM_SIZE_MAX/sizeof(t)) > MEM_SIZE_MAX/sizeof(t))

2458

2459

# define MEM_WRAP_CHECK(n,t) \

2460

(void)(UNLIKELY(_MEM_WRAP_WILL_WRAP(n,t)) \

2461

&& (croak_memory_wrap(),0))

2462

2463

# define MEM_WRAP_CHECK_1(n,t,a) \

2464

(void)(UNLIKELY(_MEM_WRAP_WILL_WRAP(n,t)) \

2465

&& (Perl_croak_nocontext("%s",(a)),0))

2466

2467

/* "a" arg must be a string literal */

2468

# define MEM_WRAP_CHECK_s(n,t,a) \

2469

(void)(UNLIKELY(_MEM_WRAP_WILL_WRAP(n,t)) \

2470

&& (Perl_croak_nocontext("" a ""),0))

2471

2472

#define MEM_WRAP_CHECK_(n,t) MEM_WRAP_CHECK(n,t),

2473

2474

#define PERL_STRLEN_ROUNDUP(n) ((void)(((n) > MEM_SIZE_MAX - 2 * PERL_STRLEN_ROUNDUP_QUANTUM) ? (croak_memory_wrap(),0) : 0), _PERL_STRLEN_ROUNDUP_UNCHECKED(n))

2475

#else

2476

2477

#define MEM_WRAP_CHECK(n,t)

2478

#define MEM_WRAP_CHECK_1(n,t,a)

2479

#define MEM_WRAP_CHECK_s(n,t,a)

2480

#define MEM_WRAP_CHECK_(n,t)

2481

2482

#define PERL_STRLEN_ROUNDUP(n) _PERL_STRLEN_ROUNDUP_UNCHECKED(n)

#endif

#ifdef PERL_MEM_LOG

/*

* If PERL_MEM_LOG is defined, all Newx()s, Renew()s, and Safefree()s

2489

* go through functions, which are handy for debugging breakpoints, but

2490

* which more importantly get the immediate calling environment (file and

2491

* line number, and C function name if available) passed in. This info can

2492

* then be used for logging the calls, for which one gets a sample

2493

* implementation unless -DPERL_MEM_LOG_NOIMPL is also defined.

2494

*

2495

* Known problems:

2496

* - not all memory allocs get logged, only those

2497

* that go through Newx() and derivatives (while all

2498

* Safefrees do get logged)

2499

* - __FILE__ and __LINE__ do not work everywhere

2500

* - __func__ or __FUNCTION__ even less so

2501

* - I think more goes on after the perlio frees but

2502

* the thing is that STDERR gets closed (as do all

2503

* the file descriptors)

2504

* - no deeper calling stack than the caller of the Newx()

2505

* or the kind, but do I look like a C reflection/introspection

2506

* utility to you?

2507

* - the function prototypes for the logging functions

2508

* probably should maybe be somewhere else than handy.h

2509

* - one could consider inlining (macrofying) the logging

2510

* for speed, but I am too lazy

2511

* - one could imagine recording the allocations in a hash,

2512

* (keyed by the allocation address?), and maintain that

2513

* through reallocs and frees, but how to do that without

2514

* any News() happening...?

2515

* - lots of -Ddefines to get useful/controllable output

2516

* - lots of ENV reads

*/

# ifdef PERL_CORE

# ifndef PERL_MEM_LOG_NOIMPL

enum mem_log_type {

MLT_ALLOC,

MLT_REALLOC,

MLT_FREE,

MLT_NEW_SV,

MLT_DEL_SV

};

# endif

# if defined(PERL_IN_SV_C) /* those are only used in sv.c */

2530

void Perl_mem_log_new_sv(const SV *sv, const char *filename, const int linenumber, const char *funcname);

2531

void Perl_mem_log_del_sv(const SV *sv, const char *filename, const int linenumber, const char *funcname);

# endif

# endif

#endif

#ifdef PERL_MEM_LOG

#define MEM_LOG_ALLOC(n,t,a) Perl_mem_log_alloc(n,sizeof(t),STRINGIFY(t),a,__FILE__,__LINE__,FUNCTION__)

2539

#define MEM_LOG_REALLOC(n,t,v,a) Perl_mem_log_realloc(n,sizeof(t),STRINGIFY(t),v,a,__FILE__,__LINE__,FUNCTION__)

2540

#define MEM_LOG_FREE(a) Perl_mem_log_free(a,__FILE__,__LINE__,FUNCTION__)

2541

#endif

2542

2543

#ifndef MEM_LOG_ALLOC

2544

#define MEM_LOG_ALLOC(n,t,a) (a)

2545

#endif

2546

#ifndef MEM_LOG_REALLOC

2547

#define MEM_LOG_REALLOC(n,t,v,a) (a)

2548

#endif

2549

#ifndef MEM_LOG_FREE

2550

#define MEM_LOG_FREE(a) (a)

2551

#endif

2552

2553

#define Newx(v,n,t) (v = (MEM_WRAP_CHECK_(n,t) (t*)MEM_LOG_ALLOC(n,t,safemalloc((MEM_SIZE)((n)*sizeof(t))))))

2554

#define Newxc(v,n,t,c) (v = (MEM_WRAP_CHECK_(n,t) (c*)MEM_LOG_ALLOC(n,t,safemalloc((MEM_SIZE)((n)*sizeof(t))))))

2555

#define Newxz(v,n,t) (v = (MEM_WRAP_CHECK_(n,t) (t*)MEM_LOG_ALLOC(n,t,safecalloc((n),sizeof(t)))))

2556

2557

#ifndef PERL_CORE

2558

/* pre 5.9.x compatibility */

2559

#define New(x,v,n,t) Newx(v,n,t)

2560

#define Newc(x,v,n,t,c) Newxc(v,n,t,c)

2561

#define Newz(x,v,n,t) Newxz(v,n,t)

2562

#endif

2563

2564

#define Renew(v,n,t) \

2565

(v = (MEM_WRAP_CHECK_(n,t) (t*)MEM_LOG_REALLOC(n,t,v,saferealloc((Malloc_t)(v),(MEM_SIZE)((n)*sizeof(t))))))

2566

#define Renewc(v,n,t,c) \

2567

(v = (MEM_WRAP_CHECK_(n,t) (c*)MEM_LOG_REALLOC(n,t,v,saferealloc((Malloc_t)(v),(MEM_SIZE)((n)*sizeof(t))))))

2568

2569

#ifdef PERL_POISON

2570

#define Safefree(d) \

2571

((d) ? (void)(safefree(MEM_LOG_FREE((Malloc_t)(d))), Poison(&(d), 1, Malloc_t)) : (void) 0)

2572

#else

2573

#define Safefree(d) safefree(MEM_LOG_FREE((Malloc_t)(d)))

2574

#endif

2575

2576

/* assert that a valid ptr has been supplied - use this instead of assert(ptr) *

2577

* as it handles cases like constant string arguments without throwing warnings *

2578

* the cast is required, as is the inequality check, to avoid warnings */

2579

#define perl_assert_ptr(p) assert( ((void*)(p)) != 0 )

2580

2581

2582

#define Move(s,d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), perl_assert_ptr(s), (void)memmove((char*)(d),(const char*)(s), (n) * sizeof(t)))

2583

#define Copy(s,d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), perl_assert_ptr(s), (void)memcpy((char*)(d),(const char*)(s), (n) * sizeof(t)))

2584

#define Zero(d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), (void)memzero((char*)(d), (n) * sizeof(t)))

2585

2586

/* Like above, but returns a pointer to 'd' */

2587

#define MoveD(s,d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), perl_assert_ptr(s), memmove((char*)(d),(const char*)(s), (n) * sizeof(t)))

2588

#define CopyD(s,d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), perl_assert_ptr(s), memcpy((char*)(d),(const char*)(s), (n) * sizeof(t)))

2589

#define ZeroD(d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), memzero((char*)(d), (n) * sizeof(t)))

2590

2591

#define PoisonWith(d,n,t,b) (MEM_WRAP_CHECK_(n,t) (void)memset((char*)(d), (U8)(b), (n) * sizeof(t)))

2592

#define PoisonNew(d,n,t) PoisonWith(d,n,t,0xAB)

2593

#define PoisonFree(d,n,t) PoisonWith(d,n,t,0xEF)

2594

#define Poison(d,n,t) PoisonFree(d,n,t)

2595

2596

#ifdef PERL_POISON

2597

# define PERL_POISON_EXPR(x) x

2598

#else

2599

# define PERL_POISON_EXPR(x)

2600

#endif

2601

2602

#define StructCopy(s,d,t) (*((t*)(d)) = *((t*)(s)))

/*

=head1 Handy Values

=for apidoc Am|STRLEN|C_ARRAY_LENGTH|void *a

2608

2609

Returns the number of elements in the input C array (so you want your

2610

zero-based indices to be less than but not equal to).

2611

2612

=for apidoc Am|void *|C_ARRAY_END|void *a

2613

2614

Returns a pointer to one element past the final element of the input C array.

=cut

C_ARRAY_END is one past the last: half-open/half-closed range, not

2619

last-inclusive range.

2620

*/

2621

#define C_ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))

2622

#define C_ARRAY_END(a) ((a) + C_ARRAY_LENGTH(a))

#ifdef NEED_VA_COPY

# ifdef va_copy

# define Perl_va_copy(s, d) va_copy(d, s)

2627

# elif defined(__va_copy)

2628

# define Perl_va_copy(s, d) __va_copy(d, s)

2629

# else

2630

# define Perl_va_copy(s, d) Copy(s, d, 1, va_list)

# endif

#endif

/* convenience debug macros */

2635

#ifdef USE_ITHREADS

2636

#define pTHX_FORMAT "Perl interpreter: 0x%p"

2637

#define pTHX__FORMAT ", Perl interpreter: 0x%p"

2638

#define pTHX_VALUE_ (void *)my_perl,

2639

#define pTHX_VALUE (void *)my_perl

2640

#define pTHX__VALUE_ ,(void *)my_perl,

2641

#define pTHX__VALUE ,(void *)my_perl

#else

#define pTHX_FORMAT

#define pTHX__FORMAT

#define pTHX_VALUE_

#define pTHX_VALUE

#define pTHX__VALUE_

#define pTHX__VALUE

#endif /* USE_ITHREADS */

2650

2651

/* Perl_deprecate was not part of the public API, and did not have a deprecate()

2652

shortcut macro defined without -DPERL_CORE. Neither codesearch.google.com nor

2653

CPAN::Unpack show any users outside the core. */

2654

#ifdef PERL_CORE

2655

# define deprecate(s) Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \

2656

"Use of " s " is deprecated")

2657

# define deprecate_disappears_in(when,message) \

2658

Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \

2659

message ", and will disappear in Perl " when)

2660

# define deprecate_fatal_in(when,message) \

2661

Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \

2662

message ". Its use will be fatal in Perl " when)

2663

#endif

2664

2665

/* Internal macros to deal with gids and uids */

2666

#ifdef PERL_CORE

2667

2668

# if Uid_t_size > IVSIZE

2669

# define sv_setuid(sv, uid) sv_setnv((sv), (NV)(uid))

2670

# define SvUID(sv) SvNV(sv)

2671

# elif Uid_t_sign <= 0

2672

# define sv_setuid(sv, uid) sv_setiv((sv), (IV)(uid))

2673

# define SvUID(sv) SvIV(sv)

2674

# else

2675

# define sv_setuid(sv, uid) sv_setuv((sv), (UV)(uid))

2676

# define SvUID(sv) SvUV(sv)

2677

# endif /* Uid_t_size */

2678

2679

# if Gid_t_size > IVSIZE

2680

# define sv_setgid(sv, gid) sv_setnv((sv), (NV)(gid))

2681

# define SvGID(sv) SvNV(sv)

2682

# elif Gid_t_sign <= 0

2683

# define sv_setgid(sv, gid) sv_setiv((sv), (IV)(gid))

2684

# define SvGID(sv) SvIV(sv)

2685

# else

2686

# define sv_setgid(sv, gid) sv_setuv((sv), (UV)(gid))

2687

# define SvGID(sv) SvUV(sv)

2688

# endif /* Gid_t_size */

#endif

#endif /* PERL_HANDY_H_ */

2693

2694

/*

2695

* ex: set ts=8 sts=4 sw=4 et:

2696

*/