perl5.git.perl.org Git - perl5.git/blame_incremental

Commit	Line	Data
	1	/* handy.h
	2	*
	3	* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1999, 2000,
	4	* 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2012 by Larry Wall and others
	5	*
	6	* You may distribute under the terms of either the GNU General Public
	7	* License or the Artistic License, as specified in the README file.
	8	*
	9	*/
	10
	11	/* IMPORTANT NOTE: Everything whose name begins with an underscore is for
	12	* internal core Perl use only. */
	13
	14	#ifndef HANDY_H /* Guard against nested #inclusion */
	15	#define HANDY_H
	16
	17	#if !defined(__STDC__)
	18	#ifdef NULL
	19	#undef NULL
	20	#endif
	21	# define NULL 0
	22	#endif
	23
	24	#ifndef PERL_CORE
	25	# define Null(type) ((type)NULL)
	26
	27	/*
	28	=head1 Handy Values
	29
	30	=for apidoc AmU\|\|Nullch
	31	Null character pointer. (No longer available when C<PERL_CORE> is
	32	defined.)
	33
	34	=for apidoc AmU\|\|Nullsv
	35	Null SV pointer. (No longer available when C<PERL_CORE> is defined.)
	36
	37	=cut
	38	*/
	39
	40	# define Nullch Null(char*)
	41	# define Nullfp Null(PerlIO*)
	42	# define Nullsv Null(SV*)
	43	#endif
	44
	45	#ifdef TRUE
	46	#undef TRUE
	47	#endif
	48	#ifdef FALSE
	49	#undef FALSE
	50	#endif
	51	#define TRUE (1)
	52	#define FALSE (0)
	53
	54	/* The MUTABLE_*() macros cast pointers to the types shown, in such a way
	55	* (compiler permitting) that casting away const-ness will give a warning;
	56	* e.g.:
	57	*
	58	* const SV *sv = ...;
	59	* AV av1 = (AV)sv; <== BAD: the const has been silently cast away
	60	* AV *av2 = MUTABLE_AV(sv); <== GOOD: it may warn
	61	*/
	62
	63	#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
	64	# define MUTABLE_PTR(p) ({ void *_p = (p); _p; })
	65	#else
	66	# define MUTABLE_PTR(p) ((void *) (p))
	67	#endif
	68
	69	#define MUTABLE_AV(p) ((AV *)MUTABLE_PTR(p))
	70	#define MUTABLE_CV(p) ((CV *)MUTABLE_PTR(p))
	71	#define MUTABLE_GV(p) ((GV *)MUTABLE_PTR(p))
	72	#define MUTABLE_HV(p) ((HV *)MUTABLE_PTR(p))
	73	#define MUTABLE_IO(p) ((IO *)MUTABLE_PTR(p))
	74	#define MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p))
	75
	76	#if defined(I_STDBOOL) && !defined(PERL_BOOL_AS_CHAR)
	77	# include <stdbool.h>
	78	# ifndef HAS_BOOL
	79	# define HAS_BOOL 1
	80	# endif
	81	#endif
	82
	83	/* bool is built-in for g++-2.6.3 and later, which might be used
	84	for extensions. <_G_config.h> defines _G_HAVE_BOOL, but we can't
	85	be sure _G_config.h will be included before this file. _G_config.h
	86	also defines _G_HAVE_BOOL for both gcc and g++, but only g++
	87	actually has bool. Hence, _G_HAVE_BOOL is pretty useless for us.
	88	g++ can be identified by __GNUG__.
	89	Andy Dougherty February 2000
	90	*/
	91	#ifdef __GNUG__ /* GNU g++ has bool built-in */
	92	# ifndef PERL_BOOL_AS_CHAR
	93	# ifndef HAS_BOOL
	94	# define HAS_BOOL 1
	95	# endif
	96	# endif
	97	#endif
	98
	99	/* The NeXT dynamic loader headers will not build with the bool macro
	100	So declare them now to clear confusion.
	101	*/
	102	#if defined(NeXT) \|\| defined(__NeXT__)
	103	# undef FALSE
	104	# undef TRUE
	105	typedef enum bool { FALSE = 0, TRUE = 1 } bool;
	106	# define ENUM_BOOL 1
	107	# ifndef HAS_BOOL
	108	# define HAS_BOOL 1
	109	# endif /* !HAS_BOOL */
	110	#endif /* NeXT \|\| __NeXT__ */
	111
	112	#ifndef HAS_BOOL
	113	# ifdef bool
	114	# undef bool
	115	# endif
	116	# define bool char
	117	# define HAS_BOOL 1
	118	#endif
	119
	120	/* cast-to-bool. A simple (bool) cast may not do the right thing: if bool is
	121	* defined as char for example, then the cast from int is
	122	* implementation-defined (bool)!!(cbool) in a ternary triggers a bug in xlc on
	123	* AIX */
	124	#define cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0)
	125
	126	/* Try to figure out __func__ or __FUNCTION__ equivalent, if any.
	127	* XXX Should really be a Configure probe, with HAS__FUNCTION__
	128	* and FUNCTION__ as results.
	129	* XXX Similarly, a Configure probe for __FILE__ and __LINE__ is needed. */
	130	#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) \|\| (defined(__SUNPRO_C)) /* C99 or close enough. */
	131	# define FUNCTION__ __func__
	132	#else
	133	# if (defined(USING_MSVC6)) \|\| /* MSVC6 has neither __func__ nor __FUNCTION and no good workarounds, either. */ \
	134	(defined(__DECC_VER)) /* Tru64 or VMS, and strict C89 being used, but not modern enough cc (in Tur64, -c99 not known, only -std1). */
	135	# define FUNCTION__ ""
	136	# else
	137	# define FUNCTION__ __FUNCTION__ /* Common extension. */
	138	# endif
	139	#endif
	140
	141	/* XXX A note on the perl source internal type system. The
	142	original intent was that I32 be exactly 32 bits.
	143
	144	Currently, we only guarantee that I32 is at least 32 bits.
	145	Specifically, if int is 64 bits, then so is I32. (This is the case
	146	for the Cray.) This has the advantage of meshing nicely with
	147	standard library calls (where we pass an I32 and the library is
	148	expecting an int), but the disadvantage that an I32 is not 32 bits.
	149	Andy Dougherty August 1996
	150
	151	There is no guarantee that there is any integral type with
	152	exactly 32 bits. It is perfectly legal for a system to have
	153	sizeof(short) == sizeof(int) == sizeof(long) == 8.
	154
	155	Similarly, there is no guarantee that I16 and U16 have exactly 16
	156	bits.
	157
	158	For dealing with issues that may arise from various 32/64-bit
	159	systems, we will ask Configure to check out
	160
	161	SHORTSIZE == sizeof(short)
	162	INTSIZE == sizeof(int)
	163	LONGSIZE == sizeof(long)
	164	LONGLONGSIZE == sizeof(long long) (if HAS_LONG_LONG)
	165	PTRSIZE == sizeof(void *)
	166	DOUBLESIZE == sizeof(double)
	167	LONG_DOUBLESIZE == sizeof(long double) (if HAS_LONG_DOUBLE).
	168
	169	*/
	170
	171	#ifdef I_INTTYPES /* e.g. Linux has int64_t without <inttypes.h> */
	172	# include <inttypes.h>
	173	# ifdef INT32_MIN_BROKEN
	174	# undef INT32_MIN
	175	# define INT32_MIN (-2147483647-1)
	176	# endif
	177	# ifdef INT64_MIN_BROKEN
	178	# undef INT64_MIN
	179	# define INT64_MIN (-9223372036854775807LL-1)
	180	# endif
	181	#endif
	182
	183	typedef I8TYPE I8;
	184	typedef U8TYPE U8;
	185	typedef I16TYPE I16;
	186	typedef U16TYPE U16;
	187	typedef I32TYPE I32;
	188	typedef U32TYPE U32;
	189	#ifdef PERL_CORE
	190	# ifdef HAS_QUAD
	191	typedef I64TYPE I64;
	192	typedef U64TYPE U64;
	193	# endif
	194	#endif /* PERL_CORE */
	195
	196	/* INT64_C/UINT64_C are C99 from <stdint.h> (so they will not be
	197	* available in strict C89 mode), but they are nice, so let's define
	198	* them if necessary. */
	199	#if defined(HAS_QUAD)
	200	# undef PeRl_INT64_C
	201	# undef PeRl_UINT64_C
	202	/* Prefer the native integer types (int and long) over long long
	203	* (which is not C89) and Win32-specific __int64. */
	204	# if QUADKIND == QUAD_IS_INT && INTSIZE == 8
	205	# define PeRl_INT64_C(c) (c)
	206	# define PeRl_UINT64_C(c) CAT2(c,U)
	207	# endif
	208	# if QUADKIND == QUAD_IS_LONG && LONGSIZE == 8
	209	# define PeRl_INT64_C(c) CAT2(c,L)
	210	# define PeRl_UINT64_C(c) CAT2(c,UL)
	211	# endif
	212	# if QUADKIND == QUAD_IS_LONG_LONG && defined(HAS_LONG_LONG)
	213	# define PeRl_INT64_C(c) CAT2(c,LL)
	214	# define PeRl_UINT64_C(c) CAT2(c,ULL)
	215	# endif
	216	# if QUADKIND == QUAD_IS___INT64
	217	# define PeRl_INT64_C(c) CAT2(c,I64)
	218	# define PeRl_UINT64_C(c) CAT2(c,UI64)
	219	# endif
	220	# ifndef PeRl_INT64_C
	221	# define PeRl_INT64_C(c) ((I64TYPE)(c)) /* last resort */
	222	# define PeRl_UINT64_C(c) ((U64TYPE)(c))
	223	# endif
	224	/* In OS X the INT64_C/UINT64_C are defined with LL/ULL, which will
	225	* not fly with C89-pedantic gcc, so let's undefine them first so that
	226	* we can redefine them with our native integer preferring versions. */
	227	# if defined(PERL_DARWIN) && defined(PERL_GCC_PEDANTIC)
	228	# undef INT64_C
	229	# undef UINT64_C
	230	# endif
	231	# ifndef INT64_C
	232	# define INT64_C(c) PeRl_INT64_C(c)
	233	# endif
	234	# ifndef UINT64_C
	235	# define UINT64_C(c) PeRl_UINT64_C(c)
	236	# endif
	237	#endif
	238
	239	#if defined(UINT8_MAX) && defined(INT16_MAX) && defined(INT32_MAX)
	240
	241	/* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.
	242	Please search CHAR_MAX in perl.h for further details. */
	243	#define U8_MAX UINT8_MAX
	244	#define U8_MIN UINT8_MIN
	245
	246	#define I16_MAX INT16_MAX
	247	#define I16_MIN INT16_MIN
	248	#define U16_MAX UINT16_MAX
	249	#define U16_MIN UINT16_MIN
	250
	251	#define I32_MAX INT32_MAX
	252	#define I32_MIN INT32_MIN
	253	#ifndef UINT32_MAX_BROKEN /* e.g. HP-UX with gcc messes this up */
	254	# define U32_MAX UINT32_MAX
	255	#else
	256	# define U32_MAX 4294967295U
	257	#endif
	258	#define U32_MIN UINT32_MIN
	259
	260	#else
	261
	262	/* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.
	263	Please search CHAR_MAX in perl.h for further details. */
	264	#define U8_MAX PERL_UCHAR_MAX
	265	#define U8_MIN PERL_UCHAR_MIN
	266
	267	#define I16_MAX PERL_SHORT_MAX
	268	#define I16_MIN PERL_SHORT_MIN
	269	#define U16_MAX PERL_USHORT_MAX
	270	#define U16_MIN PERL_USHORT_MIN
	271
	272	#if LONGSIZE > 4
	273	# define I32_MAX PERL_INT_MAX
	274	# define I32_MIN PERL_INT_MIN
	275	# define U32_MAX PERL_UINT_MAX
	276	# define U32_MIN PERL_UINT_MIN
	277	#else
	278	# define I32_MAX PERL_LONG_MAX
	279	# define I32_MIN PERL_LONG_MIN
	280	# define U32_MAX PERL_ULONG_MAX
	281	# define U32_MIN PERL_ULONG_MIN
	282	#endif
	283
	284	#endif
	285
	286	/* log(2) is pretty close to 0.30103, just in case anyone is grepping for it */
	287	#define BIT_DIGITS(N) (((N)146)/485 + 1) / log2(10) =~ 146/485 */
	288	#define TYPE_DIGITS(T) BIT_DIGITS(sizeof(T) * 8)
	289	#define TYPE_CHARS(T) (TYPE_DIGITS(T) + 2) /* sign, NUL */
	290
	291	#define Ctl(ch) ((ch) & 037)
	292
	293	/* This is a helper macro to avoid preprocessor issues, replaced by nothing
	294	* unless under DEBUGGING, where it expands to an assert of its argument,
	295	* followed by a comma (hence the comma operator). If we just used a straight
	296	* assert(), we would get a comma with nothing before it when not DEBUGGING */
	297	#ifdef DEBUGGING
	298	# define __ASSERT_(statement) assert(statement),
	299	#else
	300	# define __ASSERT_(statement)
	301	#endif
	302
	303	/*
	304	=head1 SV-Body Allocation
	305
	306	=for apidoc Ama\|SV\|newSVpvs\|const char s
	307	Like C<newSVpvn>, but takes a literal C<NUL>-terminated string instead of a
	308	string/length pair.
	309
	310	=for apidoc Ama\|SV\|newSVpvs_flags\|const char s\|U32 flags
	311	Like C<newSVpvn_flags>, but takes a literal C<NUL>-terminated string instead of
	312	a string/length pair.
	313
	314	=for apidoc Ama\|SV\|newSVpvs_share\|const char s
	315	Like C<newSVpvn_share>, but takes a literal C<NUL>-terminated string instead of
	316	a string/length pair and omits the hash parameter.
	317
	318	=for apidoc Am\|void\|sv_catpvs_flags\|SV* sv\|const char* s\|I32 flags
	319	Like C<sv_catpvn_flags>, but takes a literal C<NUL>-terminated string instead
	320	of a string/length pair.
	321
	322	=for apidoc Am\|void\|sv_catpvs_nomg\|SV* sv\|const char* s
	323	Like C<sv_catpvn_nomg>, but takes a literal string instead of a
	324	string/length pair.
	325
	326	=for apidoc Am\|void\|sv_catpvs\|SV* sv\|const char* s
	327	Like C<sv_catpvn>, but takes a literal string instead of a string/length pair.
	328
	329	=for apidoc Am\|void\|sv_catpvs_mg\|SV* sv\|const char* s
	330	Like C<sv_catpvn_mg>, but takes a literal string instead of a
	331	string/length pair.
	332
	333	=for apidoc Am\|void\|sv_setpvs\|SV* sv\|const char* s
	334	Like C<sv_setpvn>, but takes a literal string instead of a string/length pair.
	335
	336	=for apidoc Am\|void\|sv_setpvs_mg\|SV* sv\|const char* s
	337	Like C<sv_setpvn_mg>, but takes a literal string instead of a
	338	string/length pair.
	339
	340	=for apidoc Am\|SV \|sv_setref_pvs\|const char s
	341	Like C<sv_setref_pvn>, but takes a literal string instead of a
	342	string/length pair.
	343
	344	=head1 Memory Management
	345
	346	=for apidoc Ama\|char\|savepvs\|const char s
	347	Like C<savepvn>, but takes a literal C<NUL>-terminated string instead of a
	348	string/length pair.
	349
	350	=for apidoc Ama\|char\|savesharedpvs\|const char s
	351	A version of C<savepvs()> which allocates the duplicate string in memory
	352	which is shared between threads.
	353
	354	=head1 GV Functions
	355
	356	=for apidoc Am\|HV\|gv_stashpvs\|const char name\|I32 create
	357	Like C<gv_stashpvn>, but takes a literal string instead of a string/length pair.
	358
	359	=head1 Hash Manipulation Functions
	360
	361	=for apidoc Am\|SV*\|hv_fetchs\|HV tb\|const char* key\|I32 lval
	362	Like C<hv_fetch>, but takes a literal string instead of a string/length pair.
	363
	364	=for apidoc Am\|SV*\|hv_stores\|HV tb\|const char* key\|NULLOK SV* val
	365	Like C<hv_store>, but takes a literal string instead of a string/length pair
	366	and omits the hash parameter.
	367
	368	=head1 Lexer interface
	369
	370	=for apidoc Amx\|void\|lex_stuff_pvs\|const char *pv\|U32 flags
	371
	372	Like L</lex_stuff_pvn>, but takes a literal string instead of a
	373	string/length pair.
	374
	375	=cut
	376	*/
	377
	378	/* concatenating with "" ensures that only literal strings are accepted as
	379	* argument */
	380	#define STR_WITH_LEN(s) ("" s ""), (sizeof(s)-1)
	381
	382	/* note that STR_WITH_LEN() can't be used as argument to macros or functions
	383	* that under some configurations might be macros, which means that it requires
	384	* the full Perl_xxx(aTHX_ ...) form for any API calls where it's used.
	385	*/
	386
	387	/* STR_WITH_LEN() shortcuts */
	388	#define newSVpvs(str) Perl_newSVpvn(aTHX_ STR_WITH_LEN(str))
	389	#define newSVpvs_flags(str,flags) \
	390	Perl_newSVpvn_flags(aTHX_ STR_WITH_LEN(str), flags)
	391	#define newSVpvs_share(str) Perl_newSVpvn_share(aTHX_ STR_WITH_LEN(str), 0)
	392	#define sv_catpvs_flags(sv, str, flags) \
	393	Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), flags)
	394	#define sv_catpvs_nomg(sv, str) \
	395	Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), 0)
	396	#define sv_catpvs(sv, str) \
	397	Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), SV_GMAGIC)
	398	#define sv_catpvs_mg(sv, str) \
	399	Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), SV_GMAGIC\|SV_SMAGIC)
	400	#define sv_setpvs(sv, str) Perl_sv_setpvn(aTHX_ sv, STR_WITH_LEN(str))
	401	#define sv_setpvs_mg(sv, str) Perl_sv_setpvn_mg(aTHX_ sv, STR_WITH_LEN(str))
	402	#define sv_setref_pvs(rv, classname, str) \
	403	Perl_sv_setref_pvn(aTHX_ rv, classname, STR_WITH_LEN(str))
	404	#define savepvs(str) Perl_savepvn(aTHX_ STR_WITH_LEN(str))
	405	#define savesharedpvs(str) Perl_savesharedpvn(aTHX_ STR_WITH_LEN(str))
	406	#define gv_stashpvs(str, create) \
	407	Perl_gv_stashpvn(aTHX_ STR_WITH_LEN(str), create)
	408	#define gv_fetchpvs(namebeg, add, sv_type) \
	409	Perl_gv_fetchpvn_flags(aTHX_ STR_WITH_LEN(namebeg), add, sv_type)
	410	#define gv_fetchpvn(namebeg, len, add, sv_type) \
	411	Perl_gv_fetchpvn_flags(aTHX_ namebeg, len, add, sv_type)
	412	#define sv_catxmlpvs(dsv, str, utf8) \
	413	Perl_sv_catxmlpvn(aTHX_ dsv, STR_WITH_LEN(str), utf8)
	414	#define hv_fetchs(hv,key,lval) \
	415	((SV **)Perl_hv_common(aTHX_ (hv), NULL, STR_WITH_LEN(key), 0, \
	416	(lval) ? (HV_FETCH_JUST_SV \| HV_FETCH_LVALUE) \
	417	: HV_FETCH_JUST_SV, NULL, 0))
	418
	419	#define hv_stores(hv,key,val) \
	420	((SV **)Perl_hv_common(aTHX_ (hv), NULL, STR_WITH_LEN(key), 0, \
	421	(HV_FETCH_ISSTORE\|HV_FETCH_JUST_SV), (val), 0))
	422
	423	#define lex_stuff_pvs(pv,flags) Perl_lex_stuff_pvn(aTHX_ STR_WITH_LEN(pv), flags)
	424
	425	#define get_cvs(str, flags) \
	426	Perl_get_cvn_flags(aTHX_ STR_WITH_LEN(str), (flags))
	427
	428	/*
	429	=head1 Miscellaneous Functions
	430
	431	=for apidoc Am\|bool\|strNE\|char* s1\|char* s2
	432	Test two strings to see if they are different. Returns true or
	433	false.
	434
	435	=for apidoc Am\|bool\|strEQ\|char* s1\|char* s2
	436	Test two strings to see if they are equal. Returns true or false.
	437
	438	=for apidoc Am\|bool\|strLT\|char* s1\|char* s2
	439	Test two strings to see if the first, C<s1>, is less than the second,
	440	C<s2>. Returns true or false.
	441
	442	=for apidoc Am\|bool\|strLE\|char* s1\|char* s2
	443	Test two strings to see if the first, C<s1>, is less than or equal to the
	444	second, C<s2>. Returns true or false.
	445
	446	=for apidoc Am\|bool\|strGT\|char* s1\|char* s2
	447	Test two strings to see if the first, C<s1>, is greater than the second,
	448	C<s2>. Returns true or false.
	449
	450	=for apidoc Am\|bool\|strGE\|char* s1\|char* s2
	451	Test two strings to see if the first, C<s1>, is greater than or equal to
	452	the second, C<s2>. Returns true or false.
	453
	454	=for apidoc Am\|bool\|strnNE\|char* s1\|char* s2\|STRLEN len
	455	Test two strings to see if they are different. The C<len> parameter
	456	indicates the number of bytes to compare. Returns true or false. (A
	457	wrapper for C<strncmp>).
	458
	459	=for apidoc Am\|bool\|strnEQ\|char* s1\|char* s2\|STRLEN len
	460	Test two strings to see if they are equal. The C<len> parameter indicates
	461	the number of bytes to compare. Returns true or false. (A wrapper for
	462	C<strncmp>).
	463
	464	=cut
	465	*/
	466
	467	#define strNE(s1,s2) (strcmp(s1,s2))
	468	#define strEQ(s1,s2) (!strcmp(s1,s2))
	469	#define strLT(s1,s2) (strcmp(s1,s2) < 0)
	470	#define strLE(s1,s2) (strcmp(s1,s2) <= 0)
	471	#define strGT(s1,s2) (strcmp(s1,s2) > 0)
	472	#define strGE(s1,s2) (strcmp(s1,s2) >= 0)
	473	#define strnNE(s1,s2,l) (strncmp(s1,s2,l))
	474	#define strnEQ(s1,s2,l) (!strncmp(s1,s2,l))
	475
	476	#ifdef HAS_MEMCMP
	477	# define memNE(s1,s2,l) (memcmp(s1,s2,l))
	478	# define memEQ(s1,s2,l) (!memcmp(s1,s2,l))
	479	#else
	480	# define memNE(s1,s2,l) (bcmp(s1,s2,l))
	481	# define memEQ(s1,s2,l) (!bcmp(s1,s2,l))
	482	#endif
	483
	484	#define memEQs(s1, l, s2) \
	485	(sizeof(s2)-1 == l && memEQ(s1, ("" s2 ""), (sizeof(s2)-1)))
	486	#define memNEs(s1, l, s2) !memEQs(s1, l, s2)
	487
	488	/*
	489	* Character classes.
	490	*
	491	* Unfortunately, the introduction of locales means that we
	492	* can't trust isupper(), etc. to tell the truth. And when
	493	* it comes to /\w+/ with tainting enabled, we must be able
	494	* to trust our character classes.
	495	*
	496	* Therefore, the default tests in the text of Perl will be
	497	* independent of locale. Any code that wants to depend on
	498	* the current locale will use the tests that begin with "lc".
	499	*/
	500

1

/* handy.h

2

*

3

4

* 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2012 by Larry Wall and others

5

*

6

* You may distribute under the terms of either the GNU General Public

7

* License or the Artistic License, as specified in the README file.

*

*/

/* IMPORTANT NOTE: Everything whose name begins with an underscore is for

12

* internal core Perl use only. */

13

14

#ifndef HANDY_H /* Guard against nested #inclusion */

15

#define HANDY_H

16

17

#if !defined(__STDC__)

#ifdef NULL

#undef NULL

#endif

# define NULL 0

#endif

#ifndef PERL_CORE

# define Null(type) ((type)NULL)

/*

=head1 Handy Values

=for apidoc AmU||Nullch

31

Null character pointer. (No longer available when C<PERL_CORE> is

32

defined.)

33

34

=for apidoc AmU||Nullsv

35

Null SV pointer. (No longer available when C<PERL_CORE> is defined.)

=cut

*/

# define Nullch Null(char*)

41

# define Nullfp Null(PerlIO*)

42

# define Nullsv Null(SV*)

#endif

#ifdef TRUE

#undef TRUE

#endif

#ifdef FALSE

#undef FALSE

#endif

#define TRUE (1)

#define FALSE (0)

/* The MUTABLE_*() macros cast pointers to the types shown, in such a way

55

* (compiler permitting) that casting away const-ness will give a warning;

56

* e.g.:

57

*

58

* const SV *sv = ...;

59

* AV *av1 = (AV*)sv; <== BAD: the const has been silently cast away

60

* AV *av2 = MUTABLE_AV(sv); <== GOOD: it may warn

61

*/

62

63

#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)

64

# define MUTABLE_PTR(p) ({ void *_p = (p); _p; })

65

#else

66

# define MUTABLE_PTR(p) ((void *) (p))

67

#endif

68

69

#define MUTABLE_AV(p) ((AV *)MUTABLE_PTR(p))

70

#define MUTABLE_CV(p) ((CV *)MUTABLE_PTR(p))

71

#define MUTABLE_GV(p) ((GV *)MUTABLE_PTR(p))

72

#define MUTABLE_HV(p) ((HV *)MUTABLE_PTR(p))

73

#define MUTABLE_IO(p) ((IO *)MUTABLE_PTR(p))

74

#define MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p))

75

76

#if defined(I_STDBOOL) && !defined(PERL_BOOL_AS_CHAR)

77

# include <stdbool.h>

# ifndef HAS_BOOL

# define HAS_BOOL 1

# endif

#endif

/* bool is built-in for g++-2.6.3 and later, which might be used

84

for extensions. <_G_config.h> defines _G_HAVE_BOOL, but we can't

85

be sure _G_config.h will be included before this file. _G_config.h

86

also defines _G_HAVE_BOOL for both gcc and g++, but only g++

87

actually has bool. Hence, _G_HAVE_BOOL is pretty useless for us.

88

g++ can be identified by __GNUG__.

89

Andy Dougherty February 2000

90

*/

91

#ifdef __GNUG__ /* GNU g++ has bool built-in */

92

# ifndef PERL_BOOL_AS_CHAR

# ifndef HAS_BOOL

# define HAS_BOOL 1

# endif

# endif

#endif

/* The NeXT dynamic loader headers will not build with the bool macro

100

So declare them now to clear confusion.

101

*/

102

#if defined(NeXT) || defined(__NeXT__)

103

# undef FALSE

104

# undef TRUE

105

typedef enum bool { FALSE = 0, TRUE = 1 } bool;

# define ENUM_BOOL 1

# ifndef HAS_BOOL

# define HAS_BOOL 1

# endif /* !HAS_BOOL */

110

#endif /* NeXT || __NeXT__ */

#ifndef HAS_BOOL

# ifdef bool

# undef bool

# endif

# define bool char

# define HAS_BOOL 1

#endif

/* cast-to-bool. A simple (bool) cast may not do the right thing: if bool is

121

* defined as char for example, then the cast from int is

122

* implementation-defined (bool)!!(cbool) in a ternary triggers a bug in xlc on

123

* AIX */

124

#define cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0)

125

126

/* Try to figure out __func__ or __FUNCTION__ equivalent, if any.

127

* XXX Should really be a Configure probe, with HAS__FUNCTION__

128

* and FUNCTION__ as results.

129

* XXX Similarly, a Configure probe for __FILE__ and __LINE__ is needed. */

130

#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined(__SUNPRO_C)) /* C99 or close enough. */

131

# define FUNCTION__ __func__

132

#else

133

# if (defined(USING_MSVC6)) || /* MSVC6 has neither __func__ nor __FUNCTION and no good workarounds, either. */ \

134

(defined(__DECC_VER)) /* Tru64 or VMS, and strict C89 being used, but not modern enough cc (in Tur64, -c99 not known, only -std1). */

135

# define FUNCTION__ ""

136

# else

137

# define FUNCTION__ __FUNCTION__ /* Common extension. */

# endif

#endif

/* XXX A note on the perl source internal type system. The

142

original intent was that I32 be *exactly* 32 bits.

143

144

Currently, we only guarantee that I32 is *at least* 32 bits.

145

Specifically, if int is 64 bits, then so is I32. (This is the case

146

for the Cray.) This has the advantage of meshing nicely with

147

standard library calls (where we pass an I32 and the library is

148

expecting an int), but the disadvantage that an I32 is not 32 bits.

149

Andy Dougherty August 1996

150

151

There is no guarantee that there is *any* integral type with

152

exactly 32 bits. It is perfectly legal for a system to have

153

sizeof(short) == sizeof(int) == sizeof(long) == 8.

154

155

Similarly, there is no guarantee that I16 and U16 have exactly 16

156

bits.

157

158

For dealing with issues that may arise from various 32/64-bit

159

systems, we will ask Configure to check out

160

161

SHORTSIZE == sizeof(short)

162

INTSIZE == sizeof(int)

163

LONGSIZE == sizeof(long)

164

LONGLONGSIZE == sizeof(long long) (if HAS_LONG_LONG)

165

PTRSIZE == sizeof(void *)

166

DOUBLESIZE == sizeof(double)

167

LONG_DOUBLESIZE == sizeof(long double) (if HAS_LONG_DOUBLE).

*/

#ifdef I_INTTYPES /* e.g. Linux has int64_t without <inttypes.h> */

172

# include <inttypes.h>

173

# ifdef INT32_MIN_BROKEN

174

# undef INT32_MIN

175

# define INT32_MIN (-2147483647-1)

176

# endif

177

# ifdef INT64_MIN_BROKEN

178

# undef INT64_MIN

179

# define INT64_MIN (-9223372036854775807LL-1)

# endif

#endif

typedef I8TYPE I8;

typedef U8TYPE U8;

typedef I16TYPE I16;

typedef U16TYPE U16;

typedef I32TYPE I32;

typedef U32TYPE U32;

#ifdef PERL_CORE

# ifdef HAS_QUAD

typedef I64TYPE I64;

typedef U64TYPE U64;

# endif

#endif /* PERL_CORE */

195

196

/* INT64_C/UINT64_C are C99 from <stdint.h> (so they will not be

197

* available in strict C89 mode), but they are nice, so let's define

198

* them if necessary. */

199

#if defined(HAS_QUAD)

200

# undef PeRl_INT64_C

201

# undef PeRl_UINT64_C

202

/* Prefer the native integer types (int and long) over long long

203

* (which is not C89) and Win32-specific __int64. */

204

# if QUADKIND == QUAD_IS_INT && INTSIZE == 8

205

# define PeRl_INT64_C(c) (c)

206

# define PeRl_UINT64_C(c) CAT2(c,U)

207

# endif

208

# if QUADKIND == QUAD_IS_LONG && LONGSIZE == 8

209

# define PeRl_INT64_C(c) CAT2(c,L)

210

# define PeRl_UINT64_C(c) CAT2(c,UL)

211

# endif

212

# if QUADKIND == QUAD_IS_LONG_LONG && defined(HAS_LONG_LONG)

213

# define PeRl_INT64_C(c) CAT2(c,LL)

214

# define PeRl_UINT64_C(c) CAT2(c,ULL)

215

# endif

216

# if QUADKIND == QUAD_IS___INT64

217

# define PeRl_INT64_C(c) CAT2(c,I64)

218

# define PeRl_UINT64_C(c) CAT2(c,UI64)

219

# endif

220

# ifndef PeRl_INT64_C

221

# define PeRl_INT64_C(c) ((I64TYPE)(c)) /* last resort */

222

# define PeRl_UINT64_C(c) ((U64TYPE)(c))

223

# endif

224

/* In OS X the INT64_C/UINT64_C are defined with LL/ULL, which will

225

* not fly with C89-pedantic gcc, so let's undefine them first so that

226

* we can redefine them with our native integer preferring versions. */

227

# if defined(PERL_DARWIN) && defined(PERL_GCC_PEDANTIC)

# undef INT64_C

# undef UINT64_C

# endif

# ifndef INT64_C

# define INT64_C(c) PeRl_INT64_C(c)

233

# endif

234

# ifndef UINT64_C

235

# define UINT64_C(c) PeRl_UINT64_C(c)

# endif

#endif

#if defined(UINT8_MAX) && defined(INT16_MAX) && defined(INT32_MAX)

240

241

/* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.

242

Please search CHAR_MAX in perl.h for further details. */

243

#define U8_MAX UINT8_MAX

244

#define U8_MIN UINT8_MIN

245

246

#define I16_MAX INT16_MAX

247

#define I16_MIN INT16_MIN

248

#define U16_MAX UINT16_MAX

249

#define U16_MIN UINT16_MIN

250

251

#define I32_MAX INT32_MAX

252

#define I32_MIN INT32_MIN

253

#ifndef UINT32_MAX_BROKEN /* e.g. HP-UX with gcc messes this up */

254

# define U32_MAX UINT32_MAX

255

#else

256

# define U32_MAX 4294967295U

257

#endif

258

#define U32_MIN UINT32_MIN

#else

/* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.

263

Please search CHAR_MAX in perl.h for further details. */

264

#define U8_MAX PERL_UCHAR_MAX

265

#define U8_MIN PERL_UCHAR_MIN

266

267

#define I16_MAX PERL_SHORT_MAX

268

#define I16_MIN PERL_SHORT_MIN

269

#define U16_MAX PERL_USHORT_MAX

270

#define U16_MIN PERL_USHORT_MIN

271

272

#if LONGSIZE > 4

273

# define I32_MAX PERL_INT_MAX

274

# define I32_MIN PERL_INT_MIN

275

# define U32_MAX PERL_UINT_MAX

276

# define U32_MIN PERL_UINT_MIN

277

#else

278

# define I32_MAX PERL_LONG_MAX

279

# define I32_MIN PERL_LONG_MIN

280

# define U32_MAX PERL_ULONG_MAX

281

# define U32_MIN PERL_ULONG_MIN

#endif

#endif

/* log(2) is pretty close to 0.30103, just in case anyone is grepping for it */

287

#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */

288

#define TYPE_DIGITS(T) BIT_DIGITS(sizeof(T) * 8)

289

#define TYPE_CHARS(T) (TYPE_DIGITS(T) + 2) /* sign, NUL */

290

291

#define Ctl(ch) ((ch) & 037)

292

293

/* This is a helper macro to avoid preprocessor issues, replaced by nothing

294

* unless under DEBUGGING, where it expands to an assert of its argument,

295

* followed by a comma (hence the comma operator). If we just used a straight

296

* assert(), we would get a comma with nothing before it when not DEBUGGING */

297

#ifdef DEBUGGING

298

# define __ASSERT_(statement) assert(statement),

299

#else

300

# define __ASSERT_(statement)

#endif

/*

=head1 SV-Body Allocation

305

306

=for apidoc Ama|SV*|newSVpvs|const char* s

307

Like C<newSVpvn>, but takes a literal C<NUL>-terminated string instead of a

308

string/length pair.

309

310

=for apidoc Ama|SV*|newSVpvs_flags|const char* s|U32 flags

311

Like C<newSVpvn_flags>, but takes a literal C<NUL>-terminated string instead of

312

a string/length pair.

313

314

=for apidoc Ama|SV*|newSVpvs_share|const char* s

315

Like C<newSVpvn_share>, but takes a literal C<NUL>-terminated string instead of

316

a string/length pair and omits the hash parameter.

317

318

319

Like C<sv_catpvn_flags>, but takes a literal C<NUL>-terminated string instead

320

of a string/length pair.

321

322

=for apidoc Am|void|sv_catpvs_nomg|SV* sv|const char* s

323

Like C<sv_catpvn_nomg>, but takes a literal string instead of a

324

string/length pair.

325

326

=for apidoc Am|void|sv_catpvs|SV* sv|const char* s

327

Like C<sv_catpvn>, but takes a literal string instead of a string/length pair.

328

329

=for apidoc Am|void|sv_catpvs_mg|SV* sv|const char* s

330

Like C<sv_catpvn_mg>, but takes a literal string instead of a

331

string/length pair.

332

333

=for apidoc Am|void|sv_setpvs|SV* sv|const char* s

334

Like C<sv_setpvn>, but takes a literal string instead of a string/length pair.

335

336

=for apidoc Am|void|sv_setpvs_mg|SV* sv|const char* s

337

Like C<sv_setpvn_mg>, but takes a literal string instead of a

338

string/length pair.

339

340

=for apidoc Am|SV *|sv_setref_pvs|const char* s

341

Like C<sv_setref_pvn>, but takes a literal string instead of a

342

string/length pair.

343

344

=head1 Memory Management

345

346

=for apidoc Ama|char*|savepvs|const char* s

347

Like C<savepvn>, but takes a literal C<NUL>-terminated string instead of a

348

string/length pair.

349

350

=for apidoc Ama|char*|savesharedpvs|const char* s

351

A version of C<savepvs()> which allocates the duplicate string in memory

352

which is shared between threads.

=head1 GV Functions

=for apidoc Am|HV*|gv_stashpvs|const char* name|I32 create

357

Like C<gv_stashpvn>, but takes a literal string instead of a string/length pair.

358

359

=head1 Hash Manipulation Functions

360

361

362

Like C<hv_fetch>, but takes a literal string instead of a string/length pair.

363

364

365

Like C<hv_store>, but takes a literal string instead of a string/length pair

366

and omits the hash parameter.

367

368

=head1 Lexer interface

369

370

=for apidoc Amx|void|lex_stuff_pvs|const char *pv|U32 flags

371

372

Like L</lex_stuff_pvn>, but takes a literal string instead of a

string/length pair.

=cut

*/

/* concatenating with "" ensures that only literal strings are accepted as

379

* argument */

380

#define STR_WITH_LEN(s) ("" s ""), (sizeof(s)-1)

381

382

/* note that STR_WITH_LEN() can't be used as argument to macros or functions

383

* that under some configurations might be macros, which means that it requires

384

* the full Perl_xxx(aTHX_ ...) form for any API calls where it's used.

385

*/

386

387

/* STR_WITH_LEN() shortcuts */

388

#define newSVpvs(str) Perl_newSVpvn(aTHX_ STR_WITH_LEN(str))

389

#define newSVpvs_flags(str,flags) \

390

Perl_newSVpvn_flags(aTHX_ STR_WITH_LEN(str), flags)

391

#define newSVpvs_share(str) Perl_newSVpvn_share(aTHX_ STR_WITH_LEN(str), 0)

392

#define sv_catpvs_flags(sv, str, flags) \

393

Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), flags)

394

#define sv_catpvs_nomg(sv, str) \

395

Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), 0)

396

#define sv_catpvs(sv, str) \

397

Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), SV_GMAGIC)

398

#define sv_catpvs_mg(sv, str) \

399

Perl_sv_catpvn_flags(aTHX_ sv, STR_WITH_LEN(str), SV_GMAGIC|SV_SMAGIC)

400

#define sv_setpvs(sv, str) Perl_sv_setpvn(aTHX_ sv, STR_WITH_LEN(str))

401

#define sv_setpvs_mg(sv, str) Perl_sv_setpvn_mg(aTHX_ sv, STR_WITH_LEN(str))

402

#define sv_setref_pvs(rv, classname, str) \

403

Perl_sv_setref_pvn(aTHX_ rv, classname, STR_WITH_LEN(str))

404

#define savepvs(str) Perl_savepvn(aTHX_ STR_WITH_LEN(str))

405

#define savesharedpvs(str) Perl_savesharedpvn(aTHX_ STR_WITH_LEN(str))

406

#define gv_stashpvs(str, create) \

407

Perl_gv_stashpvn(aTHX_ STR_WITH_LEN(str), create)

408

#define gv_fetchpvs(namebeg, add, sv_type) \

409

Perl_gv_fetchpvn_flags(aTHX_ STR_WITH_LEN(namebeg), add, sv_type)

410

#define gv_fetchpvn(namebeg, len, add, sv_type) \

411

Perl_gv_fetchpvn_flags(aTHX_ namebeg, len, add, sv_type)

412

#define sv_catxmlpvs(dsv, str, utf8) \

413

Perl_sv_catxmlpvn(aTHX_ dsv, STR_WITH_LEN(str), utf8)

414

#define hv_fetchs(hv,key,lval) \

415

((SV **)Perl_hv_common(aTHX_ (hv), NULL, STR_WITH_LEN(key), 0, \

416

(lval) ? (HV_FETCH_JUST_SV | HV_FETCH_LVALUE) \

417

: HV_FETCH_JUST_SV, NULL, 0))

418

419

#define hv_stores(hv,key,val) \

420

((SV **)Perl_hv_common(aTHX_ (hv), NULL, STR_WITH_LEN(key), 0, \

421

(HV_FETCH_ISSTORE|HV_FETCH_JUST_SV), (val), 0))

422

423

#define lex_stuff_pvs(pv,flags) Perl_lex_stuff_pvn(aTHX_ STR_WITH_LEN(pv), flags)

424

425

#define get_cvs(str, flags) \

426

Perl_get_cvn_flags(aTHX_ STR_WITH_LEN(str), (flags))

427

428

/*

429

=head1 Miscellaneous Functions

430

431

=for apidoc Am|bool|strNE|char* s1|char* s2

432

Test two strings to see if they are different. Returns true or

433

false.

434

435

=for apidoc Am|bool|strEQ|char* s1|char* s2

436

Test two strings to see if they are equal. Returns true or false.

437

438

=for apidoc Am|bool|strLT|char* s1|char* s2

439

Test two strings to see if the first, C<s1>, is less than the second,

440

C<s2>. Returns true or false.

441

442

=for apidoc Am|bool|strLE|char* s1|char* s2

443

Test two strings to see if the first, C<s1>, is less than or equal to the

444

second, C<s2>. Returns true or false.

445

446

=for apidoc Am|bool|strGT|char* s1|char* s2

447

Test two strings to see if the first, C<s1>, is greater than the second,

448

C<s2>. Returns true or false.

449

450

=for apidoc Am|bool|strGE|char* s1|char* s2

451

Test two strings to see if the first, C<s1>, is greater than or equal to

452

the second, C<s2>. Returns true or false.

453

454

455

Test two strings to see if they are different. The C<len> parameter

456

indicates the number of bytes to compare. Returns true or false. (A

457

wrapper for C<strncmp>).

458

459

460

Test two strings to see if they are equal. The C<len> parameter indicates

461

the number of bytes to compare. Returns true or false. (A wrapper for

C<strncmp>).

=cut

*/

#define strNE(s1,s2) (strcmp(s1,s2))

468

#define strEQ(s1,s2) (!strcmp(s1,s2))

469

#define strLT(s1,s2) (strcmp(s1,s2) < 0)

470

#define strLE(s1,s2) (strcmp(s1,s2) <= 0)

471

#define strGT(s1,s2) (strcmp(s1,s2) > 0)

472

#define strGE(s1,s2) (strcmp(s1,s2) >= 0)

473

#define strnNE(s1,s2,l) (strncmp(s1,s2,l))

474

#define strnEQ(s1,s2,l) (!strncmp(s1,s2,l))

475

476

#ifdef HAS_MEMCMP

477

# define memNE(s1,s2,l) (memcmp(s1,s2,l))

478

# define memEQ(s1,s2,l) (!memcmp(s1,s2,l))

479

#else

480

# define memNE(s1,s2,l) (bcmp(s1,s2,l))

481

# define memEQ(s1,s2,l) (!bcmp(s1,s2,l))

482

#endif

483

484

#define memEQs(s1, l, s2) \

485

(sizeof(s2)-1 == l && memEQ(s1, ("" s2 ""), (sizeof(s2)-1)))

486

#define memNEs(s1, l, s2) !memEQs(s1, l, s2)

/*

* Character classes.

*

* Unfortunately, the introduction of locales means that we

492

* can't trust isupper(), etc. to tell the truth. And when

493

* it comes to /\w+/ with tainting enabled, we *must* be able

494

* to trust our character classes.

495

*

496

* Therefore, the default tests in the text of Perl will be

497

* independent of locale. Any code that wants to depend on

498

* the current locale will use the tests that begin with "lc".

499

*/

500

501

#ifdef HAS_SETLOCALE /* XXX Is there a better test for this? */

# ifndef CTYPE256

# define CTYPE256

# endif

#endif

/*

=head1 Character classes

510

This section is about functions (really macros) that classify characters

511

into types, such as punctuation versus alphabetic, etc. Most of these are

512

analogous to regular expression character classes. (See

513

L<perlrecharclass/POSIX Character Classes>.) There are several variants for

514

each class. (Not all macros have all variants; each item below lists the

515

ones valid for it.) None are affected by C<use bytes>, and only the ones

516

with C<LC> in the name are affected by the current locale.

517

518

The base function, e.g., C<isALPHA()>, takes an octet (either a C<char> or a

519

C<U8>) as input and returns a boolean as to whether or not the character

520

represented by that octet is (or on non-ASCII platforms, corresponds to) an

521

ASCII character in the named class based on platform, Unicode, and Perl rules.

522

If the input is a number that doesn't fit in an octet, FALSE is returned.

523

524

Variant C<isFOO_A> (e.g., C<isALPHA_A()>) is identical to the base function

525

with no suffix C<"_A">.

526

527

Variant C<isFOO_L1> imposes the Latin-1 (or EBCDIC equivlalent) character set

528

onto the platform. That is, the code points that are ASCII are unaffected,

529

since ASCII is a subset of Latin-1. But the non-ASCII code points are treated

530

as if they are Latin-1 characters. For example, C<isWORDCHAR_L1()> will return

531

true when called with the code point 0xDF, which is a word character in both

532

ASCII and EBCDIC (though it represents different characters in each).

533

534

Variant C<isFOO_uni> is like the C<isFOO_L1> variant, but accepts any UV code

535

point as input. If the code point is larger than 255, Unicode rules are used

536

to determine if it is in the character class. For example,

537

C<isWORDCHAR_uni(0x100)> returns TRUE, since 0x100 is LATIN CAPITAL LETTER A

538

WITH MACRON in Unicode, and is a word character.

539

540

Variant C<isFOO_utf8> is like C<isFOO_uni>, but the input is a pointer to a

541

(known to be well-formed) UTF-8 encoded string (C<U8*> or C<char*>). The

542

classification of just the first (possibly multi-byte) character in the string

543

is tested.

544

545

Variant C<isFOO_LC> is like the C<isFOO_A> and C<isFOO_L1> variants, but the

546

result is based on the current locale, which is what C<LC> in the name stands

547

for. If Perl can determine that the current locale is a UTF-8 locale, it uses

548

the published Unicode rules; otherwise, it uses the C library function that

549

gives the named classification. For example, C<isDIGIT_LC()> when not in a

550

UTF-8 locale returns the result of calling C<isdigit()>. FALSE is always

551

returned if the input won't fit into an octet.

552

553

Variant C<isFOO_LC_uvchr> is like C<isFOO_LC>, but is defined on any UV. It

554

returns the same as C<isFOO_LC> for input code points less than 256, and

555

returns the hard-coded, not-affected-by-locale, Unicode results for larger ones.

556

557

Variant C<isFOO_LC_utf8> is like C<isFOO_LC_uvchr>, but the input is a pointer to a

558

(known to be well-formed) UTF-8 encoded string (C<U8*> or C<char*>). The

559

classification of just the first (possibly multi-byte) character in the string

560

is tested.

561

562

=for apidoc Am|bool|isALPHA|char ch

563

Returns a boolean indicating whether the specified character is an

564

alphabetic character, analogous to C<m/[[:alpha:]]/>.

565

See the L<top of this section|/Character classes> for an explanation of variants

566

C<isALPHA_A>, C<isALPHA_L1>, C<isALPHA_uni>, C<isALPHA_utf8>, C<isALPHA_LC>,

567

C<isALPHA_LC_uvchr>, and C<isALPHA_LC_utf8>.

568

569

=for apidoc Am|bool|isALPHANUMERIC|char ch

570

Returns a boolean indicating whether the specified character is a either an

571

alphabetic character or decimal digit, analogous to C<m/[[:alnum:]]/>.

572

See the L<top of this section|/Character classes> for an explanation of variants

573

C<isALPHANUMERIC_A>, C<isALPHANUMERIC_L1>, C<isALPHANUMERIC_uni>,

574

C<isALPHANUMERIC_utf8>, C<isALPHANUMERIC_LC>, C<isALPHANUMERIC_LC_uvchr>, and

575

C<isALPHANUMERIC_LC_utf8>.

576

577

=for apidoc Am|bool|isASCII|char ch

578

Returns a boolean indicating whether the specified character is one of the 128

579

characters in the ASCII character set, analogous to C<m/[[:ascii:]]/>.

580

On non-ASCII platforms, it returns TRUE iff this

581

character corresponds to an ASCII character. Variants C<isASCII_A()> and

582

C<isASCII_L1()> are identical to C<isASCII()>.

583

See the L<top of this section|/Character classes> for an explanation of variants

584

C<isASCII_uni>, C<isASCII_utf8>, C<isASCII_LC>, C<isASCII_LC_uvchr>, and

585

C<isASCII_LC_utf8>. Note, however, that some platforms do not have the C

586

library routine C<isascii()>. In these cases, the variants whose names contain

587

C<LC> are the same as the corresponding ones without.

588

589

Also note, that because all ASCII characters are UTF-8 invariant (meaning they

590

have the exact same representation (always a single byte) whether encoded in

591

UTF-8 or not), C<isASCII> will give the correct results when called with any

592

byte in any string encoded or not in UTF-8. And similarly C<isASCII_utf8> will

593

work properly on any string encoded or not in UTF-8.

594

595

=for apidoc Am|bool|isBLANK|char ch

596

Returns a boolean indicating whether the specified character is a

597

character considered to be a blank, analogous to C<m/[[:blank:]]/>.

598

See the L<top of this section|/Character classes> for an explanation of variants

599

C<isBLANK_A>, C<isBLANK_L1>, C<isBLANK_uni>, C<isBLANK_utf8>, C<isBLANK_LC>,

600

C<isBLANK_LC_uvchr>, and C<isBLANK_LC_utf8>. Note, however, that some

601

platforms do not have the C library routine C<isblank()>. In these cases, the

602

variants whose names contain C<LC> are the same as the corresponding ones

603

without.

604

605

=for apidoc Am|bool|isCNTRL|char ch

606

Returns a boolean indicating whether the specified character is a

607

control character, analogous to C<m/[[:cntrl:]]/>.

608

See the L<top of this section|/Character classes> for an explanation of variants

609

C<isCNTRL_A>, C<isCNTRL_L1>, C<isCNTRL_uni>, C<isCNTRL_utf8>, C<isCNTRL_LC>,

610

C<isCNTRL_LC_uvchr>, and C<isCNTRL_LC_utf8>

611

On EBCDIC platforms, you almost always want to use the C<isCNTRL_L1> variant.

612

613

=for apidoc Am|bool|isDIGIT|char ch

614

Returns a boolean indicating whether the specified character is a

615

digit, analogous to C<m/[[:digit:]]/>.

616

Variants C<isDIGIT_A> and C<isDIGIT_L1> are identical to C<isDIGIT>.

617

See the L<top of this section|/Character classes> for an explanation of variants

618

C<isDIGIT_uni>, C<isDIGIT_utf8>, C<isDIGIT_LC>, C<isDIGIT_LC_uvchr>, and

619

C<isDIGIT_LC_utf8>.

620

621

=for apidoc Am|bool|isGRAPH|char ch

622

Returns a boolean indicating whether the specified character is a

623

graphic character, analogous to C<m/[[:graph:]]/>.

624

See the L<top of this section|/Character classes> for an explanation of variants

625

C<isGRAPH_A>, C<isGRAPH_L1>, C<isGRAPH_uni>, C<isGRAPH_utf8>, C<isGRAPH_LC>,

626

C<isGRAPH_LC_uvchr>, and C<isGRAPH_LC_utf8>.

627

628

=for apidoc Am|bool|isLOWER|char ch

629

Returns a boolean indicating whether the specified character is a

630

lowercase character, analogous to C<m/[[:lower:]]/>.

631

See the L<top of this section|/Character classes> for an explanation of variants

632

C<isLOWER_A>, C<isLOWER_L1>, C<isLOWER_uni>, C<isLOWER_utf8>, C<isLOWER_LC>,

633

C<isLOWER_LC_uvchr>, and C<isLOWER_LC_utf8>.

634

635

=for apidoc Am|bool|isOCTAL|char ch

636

Returns a boolean indicating whether the specified character is an

637

octal digit, [0-7].

638

The only two variants are C<isOCTAL_A> and C<isOCTAL_L1>; each is identical to

639

C<isOCTAL>.

640

641

=for apidoc Am|bool|isPUNCT|char ch

642

Returns a boolean indicating whether the specified character is a

643

punctuation character, analogous to C<m/[[:punct:]]/>.

644

Note that the definition of what is punctuation isn't as

645

straightforward as one might desire. See L<perlrecharclass/POSIX Character

646

Classes> for details.

647

See the L<top of this section|/Character classes> for an explanation of variants

648

C<isPUNCT_A>, C<isPUNCT_L1>, C<isPUNCT_uni>, C<isPUNCT_utf8>, C<isPUNCT_LC>,

649

C<isPUNCT_LC_uvchr>, and C<isPUNCT_LC_utf8>.

650

651

=for apidoc Am|bool|isSPACE|char ch

652

Returns a boolean indicating whether the specified character is a

653

whitespace character. This is analogous

654

to what C<m/\s/> matches in a regular expression. Starting in Perl 5.18

655

(experimentally), this also matches what C<m/[[:space:]]/> does.

656

("Experimentally" means that this change may be backed out in 5.22 if

657

field experience indicates that it was unwise.) Prior to 5.18, only the

658

locale forms of this macro (the ones with C<LC> in their names) matched

659

precisely what C<m/[[:space:]]/> does. In those releases, the only difference,

660

in the non-locale variants, was that C<isSPACE()> did not match a vertical tab.

661

(See L</isPSXSPC> for a macro that matches a vertical tab in all releases.)

662

See the L<top of this section|/Character classes> for an explanation of variants

663

C<isSPACE_A>, C<isSPACE_L1>, C<isSPACE_uni>, C<isSPACE_utf8>, C<isSPACE_LC>,

664

C<isSPACE_LC_uvchr>, and C<isSPACE_LC_utf8>.

665

666

=for apidoc Am|bool|isPSXSPC|char ch

667

(short for Posix Space)

668

Starting in 5.18, this is identical (experimentally) in all its forms to the

669

corresponding C<isSPACE()> macros. ("Experimentally" means that this change

670

may be backed out in 5.22 if field experience indicates that it

671

was unwise.)

672

The locale forms of this macro are identical to their corresponding

673

C<isSPACE()> forms in all Perl releases. In releases prior to 5.18, the

674

non-locale forms differ from their C<isSPACE()> forms only in that the

675

C<isSPACE()> forms don't match a Vertical Tab, and the C<isPSXSPC()> forms do.

676

Otherwise they are identical. Thus this macro is analogous to what

677

C<m/[[:space:]]/> matches in a regular expression.

678

See the L<top of this section|/Character classes> for an explanation of variants

679

C<isPSXSPC_A>, C<isPSXSPC_L1>, C<isPSXSPC_uni>, C<isPSXSPC_utf8>, C<isPSXSPC_LC>,

680

C<isPSXSPC_LC_uvchr>, and C<isPSXSPC_LC_utf8>.

681

682

=for apidoc Am|bool|isUPPER|char ch

683

Returns a boolean indicating whether the specified character is an

684

uppercase character, analogous to C<m/[[:upper:]]/>.

685

See the L<top of this section|/Character classes> for an explanation of variants

686

C<isUPPER_A>, C<isUPPER_L1>, C<isUPPER_uni>, C<isUPPER_utf8>, C<isUPPER_LC>,

687

C<isUPPER_LC_uvchr>, and C<isUPPER_LC_utf8>.

688

689

=for apidoc Am|bool|isPRINT|char ch

690

Returns a boolean indicating whether the specified character is a

691

printable character, analogous to C<m/[[:print:]]/>.

692

See the L<top of this section|/Character classes> for an explanation of variants

693

C<isPRINT_A>, C<isPRINT_L1>, C<isPRINT_uni>, C<isPRINT_utf8>, C<isPRINT_LC>,

694

C<isPRINT_LC_uvchr>, and C<isPRINT_LC_utf8>.

695

696

=for apidoc Am|bool|isWORDCHAR|char ch

697

Returns a boolean indicating whether the specified character is a character

698

that is a word character, analogous to what C<m/\w/> and C<m/[[:word:]]/> match

699

in a regular expression. A word character is an alphabetic character, a

700

decimal digit, a connecting punctuation character (such as an underscore), or

701

a "mark" character that attaches to one of those (like some sort of accent).

702

C<isALNUM()> is a synonym provided for backward compatibility, even though a

703

word character includes more than the standard C language meaning of

704

alphanumeric.

705

See the L<top of this section|/Character classes> for an explanation of variants

706

C<isWORDCHAR_A>, C<isWORDCHAR_L1>, C<isWORDCHAR_uni>, C<isWORDCHAR_utf8>,

707

C<isWORDCHAR_LC>, C<isWORDCHAR_LC_uvchr>, and C<isWORDCHAR_LC_utf8>.

708

709

=for apidoc Am|bool|isXDIGIT|char ch

710

Returns a boolean indicating whether the specified character is a hexadecimal

711

digit. In the ASCII range these are C<[0-9A-Fa-f]>. Variants C<isXDIGIT_A()>

712

and C<isXDIGIT_L1()> are identical to C<isXDIGIT()>.

713

See the L<top of this section|/Character classes> for an explanation of variants

714

C<isXDIGIT_uni>, C<isXDIGIT_utf8>, C<isXDIGIT_LC>, C<isXDIGIT_LC_uvchr>, and

715

C<isXDIGIT_LC_utf8>.

716

717

=for apidoc Am|bool|isIDFIRST|char ch

718

Returns a boolean indicating whether the specified character can be the first

719

character of an identifier. This is very close to, but not quite the same as

720

the official Unicode property C<XID_Start>. The difference is that this

721

returns true only if the input character also matches L</isWORDCHAR>.

722

See the L<top of this section|/Character classes> for an explanation of variants

723

C<isIDFIRST_A>, C<isIDFIRST_L1>, C<isIDFIRST_uni>, C<isIDFIRST_utf8>,

724

C<isIDFIRST_LC>, C<isIDFIRST_LC_uvchr>, and C<isIDFIRST_LC_utf8>.

725

726

=for apidoc Am|bool|isIDCONT|char ch

727

Returns a boolean indicating whether the specified character can be the

728

second or succeeding character of an identifier. This is very close to, but

729

not quite the same as the official Unicode property C<XID_Continue>. The

730

difference is that this returns true only if the input character also matches

731

L</isWORDCHAR>. See the L<top of this section|/Character classes> for an

732

explanation of variants C<isIDCONT_A>, C<isIDCONT_L1>, C<isIDCONT_uni>,

733

C<isIDCONT_utf8>, C<isIDCONT_LC>, C<isIDCONT_LC_uvchr>, and

734

C<isIDCONT_LC_utf8>.

735

736

=head1 Miscellaneous Functions

737

738

=for apidoc Am|U8|READ_XDIGIT|char str*

739

Returns the value of an ASCII-range hex digit and advances the string pointer.

740

Behaviour is only well defined when isXDIGIT(*str) is true.

741

742

=head1 Character case changing

743

744

=for apidoc Am|U8|toUPPER|U8 ch

745

Converts the specified character to uppercase. If the input is anything but an

746

ASCII lowercase character, that input character itself is returned. Variant

747

C<toUPPER_A> is equivalent.

748

749

750

Converts the Unicode code point C<cp> to its uppercase version, and

751

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

752

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

753

bytes since the uppercase version may be longer than the original character.

754

755

The first code point of the uppercased version is returned

756

(but note, as explained just above, that there may be more.)

757

758

=for apidoc Am|UV|toUPPER_utf8|U8* p|U8* s|STRLEN* lenp

759

Converts the UTF-8 encoded character at C to its uppercase version, and

760

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

761

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

762

bytes since the uppercase version may be longer than the original character.

763

764

The first code point of the uppercased version is returned

765

(but note, as explained just above, that there may be more.)

766

767

The input character at C is assumed to be well-formed.

768

769

=for apidoc Am|U8|toFOLD|U8 ch

770

Converts the specified character to foldcase. If the input is anything but an

771

ASCII uppercase character, that input character itself is returned. Variant

772

C<toFOLD_A> is equivalent. (There is no equivalent C<to_FOLD_L1> for the full

773

Latin1 range, as the full generality of L</toFOLD_uni> is needed there.)

774

775

776

Converts the Unicode code point C<cp> to its foldcase version, and

777

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

778

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

779

bytes since the foldcase version may be longer than the original character.

780

781

The first code point of the foldcased version is returned

782

(but note, as explained just above, that there may be more.)

783

784

=for apidoc Am|UV|toFOLD_utf8|U8* p|U8* s|STRLEN* lenp

785

Converts the UTF-8 encoded character at C to its foldcase version, and

786

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

787

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

788

bytes since the foldcase version may be longer than the original character.

789

790

The first code point of the foldcased version is returned

791

(but note, as explained just above, that there may be more.)

792

793

The input character at C is assumed to be well-formed.

794

795

=for apidoc Am|U8|toLOWER|U8 ch

796

Converts the specified character to lowercase. If the input is anything but an

797

ASCII uppercase character, that input character itself is returned. Variant

798

C<toLOWER_A> is equivalent.

799

800

=for apidoc Am|U8|toLOWER_L1|U8 ch

801

Converts the specified Latin1 character to lowercase. The results are undefined if

802

the input doesn't fit in a byte.

803

804

=for apidoc Am|U8|toLOWER_LC|U8 ch

805

Converts the specified character to lowercase using the current locale's rules,

806

if possible; otherwise returns the input character itself.

807

808

809

Converts the Unicode code point C<cp> to its lowercase version, and

810

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

811

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

812

bytes since the lowercase version may be longer than the original character.

813

814

The first code point of the lowercased version is returned

815

(but note, as explained just above, that there may be more.)

816

817

=for apidoc Am|UV|toLOWER_utf8|U8* p|U8* s|STRLEN* lenp

818

Converts the UTF-8 encoded character at C to its lowercase version, and

819

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

820

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

821

bytes since the lowercase version may be longer than the original character.

822

823

The first code point of the lowercased version is returned

824

(but note, as explained just above, that there may be more.)

825

826

The input character at C is assumed to be well-formed.

827

828

=for apidoc Am|U8|toLOWER_LC|U8 ch

829

Converts the specified character to lowercase using the current locale's rules,

830

if possible; otherwise returns the input character itself.

831

832

=for apidoc Am|U8|toTITLE|U8 ch

833

Converts the specified character to titlecase. If the input is anything but an

834

ASCII lowercase character, that input character itself is returned. Variant

835

C<toTITLE_A> is equivalent. (There is no C<toTITLE_L1> for the full Latin1 range,

836

as the full generality of L</toTITLE_uni> is needed there. Titlecase is not a

837

concept used in locale handling, so there is no functionality for that.)

838

839

840

Converts the Unicode code point C<cp> to its titlecase version, and

841

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

842

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

843

bytes since the titlecase version may be longer than the original character.

844

845

The first code point of the titlecased version is returned

846

(but note, as explained just above, that there may be more.)

847

848

=for apidoc Am|UV|toTITLE_utf8|U8* p|U8* s|STRLEN* lenp

849

Converts the UTF-8 encoded character at C to its titlecase version, and

850

stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>. Note

851

that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>

852

bytes since the titlecase version may be longer than the original character.

853

854

The first code point of the titlecased version is returned

855

(but note, as explained just above, that there may be more.)

856

857

The input character at C is assumed to be well-formed.

=cut

XXX Still undocumented isVERTWS_uni and _utf8; it's unclear what their names

862

really should be. Also toUPPER_LC and toFOLD_LC, which are subject to change.

863

864

Note that these macros are repeated in Devel::PPPort, so should also be

865

patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc

*/

/* Specify the widest unsigned type on the platform. Use U64TYPE because U64

870

* is known only in the perl core, and this macro can be called from outside

871

* that */

872

#ifdef HAS_QUAD

873

# define WIDEST_UTYPE U64TYPE

874

#else

875

# define WIDEST_UTYPE U32

876

#endif

877

878

/* FITS_IN_8_BITS(c) returns true if c doesn't have a bit set other than in

879

* the lower 8. It is designed to be hopefully bomb-proof, making sure that no

880

* bits of information are lost even on a 64-bit machine, but to get the

881

* compiler to optimize it out if possible. This is because Configure makes

882

* sure that the machine has an 8-bit byte, so if c is stored in a byte, the

883

* sizeof() guarantees that this evaluates to a constant true at compile time.

884

*/

885

#define FITS_IN_8_BITS(c) ((sizeof(c) == 1) || !(((WIDEST_UTYPE)(c)) & ~0xFF))

#ifdef EBCDIC

# ifndef _ALL_SOURCE

/* This returns the wrong results on at least z/OS unless this is

890

* defined. */

891

# error _ALL_SOURCE should probably be defined

892

# endif

893

894

/* We could be called without perl.h, in which case NATIVE_TO_ASCII() is

895

* likely not defined, and so we use the native function */

896

# define isASCII(c) cBOOL(isascii(c))

897

#else

898

# define isASCII(c) ((WIDEST_UTYPE)(c) < 128)

899

#endif

900

901

#define isASCII_A(c) isASCII(c)

902

#define isASCII_L1(c) isASCII(c)

903

904

/* The lower 3 bits in both the ASCII and EBCDIC representations of '0' are 0,

905

* and the 8 possible permutations of those bits exactly comprise the 8 octal

906

* digits */

907

#define isOCTAL_A(c) cBOOL(FITS_IN_8_BITS(c) && (0xF8 & (c)) == '0')

908

909

/* ASCII range only */

910

#ifdef H_PERL /* If have access to perl.h, lookup in its table */

911

912

/* Character class numbers. For internal core Perl use only. The ones less

913

* than 32 are used in PL_charclass[] and the ones up through the one that

914

* corresponds to <_HIGHEST_REGCOMP_DOT_H_SYNC> are used by regcomp.h and

915

* related files. PL_charclass ones use names used in l1_char_class_tab.h but

916

* their actual definitions are here. If that file has a name not used here,

917

* it won't compile.

918

*

919

* The first group of these is ordered in what I (khw) estimate to be the

920

* frequency of their use. This gives a slight edge to exiting a loop earlier

921

* (in reginclass() in regexec.c) */

922

# define _CC_WORDCHAR 0 /* \w and [:word:] */

923

# define _CC_DIGIT 1 /* \d and [:digit:] */

924

# define _CC_ALPHA 2 /* [:alpha:] */

925

# define _CC_LOWER 3 /* [:lower:] */

926

# define _CC_UPPER 4 /* [:upper:] */

927

# define _CC_PUNCT 5 /* [:punct:] */

928

# define _CC_PRINT 6 /* [:print:] */

929

# define _CC_ALPHANUMERIC 7 /* [:alnum:] */

930

# define _CC_GRAPH 8 /* [:graph:] */

931

# define _CC_CASED 9 /* [:lower:] and [:upper:] under /i */

932

933

#define _FIRST_NON_SWASH_CC 10

934

/* The character classes above are implemented with swashes. The second group

935

* (just below) contains the ones implemented without. These are also sorted

936

* in rough order of the frequency of their use, except that \v should be last,

937

* as it isn't a real Posix character class, and some (small) inefficiencies in

938

* regular expression handling would be introduced by putting it in the middle

939

* of those that are. Also, cntrl and ascii come after the others as it may be

940

* useful to group these which have no members that match above Latin1, (or

941

* above ASCII in the latter case) */

942

943

# define _CC_SPACE 10 /* \s */

944

# define _CC_BLANK 11 /* [:blank:] */

945

# define _CC_XDIGIT 12 /* [:xdigit:] */

946

# define _CC_PSXSPC 13 /* [:space:] */

947

# define _CC_CNTRL 14 /* [:cntrl:] */

948

# define _CC_ASCII 15 /* [:ascii:] */

949

# define _CC_VERTSPACE 16 /* \v */

950

951

# define _HIGHEST_REGCOMP_DOT_H_SYNC _CC_VERTSPACE

952

953

/* The members of the third group below do not need to be coordinated with data

954

* structures in regcomp.[ch] and regexec.c. */

955

# define _CC_IDFIRST 17

956

# define _CC_CHARNAME_CONT 18

957

# define _CC_NONLATIN1_FOLD 19

958

# define _CC_QUOTEMETA 20

959

# define _CC_NON_FINAL_FOLD 21

960

# define _CC_IS_IN_SOME_FOLD 22

961

# define _CC_BACKSLASH_FOO_LBRACE_IS_META 31 /* temp, see mk_PL_charclass.pl */

962

/* Unused: 23-30

963

* If more bits are needed, one could add a second word for non-64bit

964

* QUAD_IS_INT systems, using some #ifdefs to distinguish between having a 2nd

965

* word or not. The IS_IN_SOME_FOLD bit is the most easily expendable, as it

966

* is used only for optimization (as of this writing), and differs in the

967

* Latin1 range from the ALPHA bit only in two relatively unimportant

968

* characters: the masculine and feminine ordinal indicators, so removing it

969

* would just cause /i regexes which match them to run less efficiently */

970

971

#if defined(PERL_CORE) || defined(PERL_EXT)

972

/* An enum version of the character class numbers, to help compilers

973

* optimize */

974

typedef enum {

975

_CC_ENUM_ALPHA = _CC_ALPHA,

976

_CC_ENUM_ALPHANUMERIC = _CC_ALPHANUMERIC,

977

_CC_ENUM_ASCII = _CC_ASCII,

978

_CC_ENUM_BLANK = _CC_BLANK,

979

_CC_ENUM_CASED = _CC_CASED,

980

_CC_ENUM_CNTRL = _CC_CNTRL,

981

_CC_ENUM_DIGIT = _CC_DIGIT,

982

_CC_ENUM_GRAPH = _CC_GRAPH,

983

_CC_ENUM_LOWER = _CC_LOWER,

984

_CC_ENUM_PRINT = _CC_PRINT,

985

_CC_ENUM_PSXSPC = _CC_PSXSPC,

986

_CC_ENUM_PUNCT = _CC_PUNCT,

987

_CC_ENUM_SPACE = _CC_SPACE,

988

_CC_ENUM_UPPER = _CC_UPPER,

989

_CC_ENUM_VERTSPACE = _CC_VERTSPACE,

990

_CC_ENUM_WORDCHAR = _CC_WORDCHAR,

991

_CC_ENUM_XDIGIT = _CC_XDIGIT

992

} _char_class_number;

993

#endif

994

995

#define POSIX_SWASH_COUNT _FIRST_NON_SWASH_CC

996

#define POSIX_CC_COUNT (_HIGHEST_REGCOMP_DOT_H_SYNC + 1)

997

998

#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)

999

# if _CC_WORDCHAR != 0 || _CC_DIGIT != 1 || _CC_ALPHA != 2 || _CC_LOWER != 3 \

1000

|| _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6 \

1001

|| _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8 || _CC_CASED != 9

1002

#error Need to adjust order of swash_property_names[]

1003

# endif

1004

1005

/* This is declared static in each of the few files that this is #defined for

1006

* to keep them from being publicly accessible. Hence there is a small amount

1007

* of wasted space */

1008

1009

static const char* const swash_property_names[] = {

"XPosixWord",

"XPosixDigit",

"XPosixAlpha",

"XPosixLower",

"XPosixUpper",

"XPosixPunct",

"XPosixPrint",

"XPosixAlnum",

"XPosixGraph",

"Cased"

};

#endif

# ifdef DOINIT

EXTCONST U32 PL_charclass[] = {

1025

# include "l1_char_class_tab.h"

1026

};

1027

1028

# else /* ! DOINIT */

1029

EXTCONST U32 PL_charclass[];

1030

# endif

1031

1032

/* The 1U keeps Solaris from griping when shifting sets the uppermost bit */

1033

# define _CC_mask(classnum) (1U << (classnum))

1034

1035

/* For internal core Perl use only: the base macro for defining macros like

1036

* isALPHA */

1037

# define _generic_isCC(c, classnum) cBOOL(FITS_IN_8_BITS(c) \

1038

&& (PL_charclass[(U8) (c)] & _CC_mask(classnum)))

1039

1040

/* The mask for the _A versions of the macros; it just adds in the bit for

1041

* ASCII. */

1042

# define _CC_mask_A(classnum) (_CC_mask(classnum) | _CC_mask(_CC_ASCII))

1043

1044

/* For internal core Perl use only: the base macro for defining macros like

1045

* isALPHA_A. The foo_A version makes sure that both the desired bit and

1046

* the ASCII bit are present */

1047

# define _generic_isCC_A(c, classnum) (FITS_IN_8_BITS(c) \

1048

&& ((PL_charclass[(U8) (c)] & _CC_mask_A(classnum)) \

1049

== _CC_mask_A(classnum)))

1050

1051

# define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA)

1052

# define isALPHANUMERIC_A(c) _generic_isCC_A(c, _CC_ALPHANUMERIC)

1053

# define isBLANK_A(c) _generic_isCC_A(c, _CC_BLANK)

1054

# define isCNTRL_A(c) _generic_isCC_A(c, _CC_CNTRL)

1055

# define isDIGIT_A(c) _generic_isCC(c, _CC_DIGIT)

1056

# define isGRAPH_A(c) _generic_isCC_A(c, _CC_GRAPH)

1057

# define isLOWER_A(c) _generic_isCC_A(c, _CC_LOWER)

1058

# define isPRINT_A(c) _generic_isCC_A(c, _CC_PRINT)

1059

# define isPSXSPC_A(c) _generic_isCC_A(c, _CC_PSXSPC)

1060

# define isPUNCT_A(c) _generic_isCC_A(c, _CC_PUNCT)

1061

# define isSPACE_A(c) _generic_isCC_A(c, _CC_SPACE)

1062

# define isUPPER_A(c) _generic_isCC_A(c, _CC_UPPER)

1063

# define isWORDCHAR_A(c) _generic_isCC_A(c, _CC_WORDCHAR)

1064

# define isXDIGIT_A(c) _generic_isCC(c, _CC_XDIGIT)

1065

# define isIDFIRST_A(c) _generic_isCC_A(c, _CC_IDFIRST)

1066

# define isALPHA_L1(c) _generic_isCC(c, _CC_ALPHA)

1067

# define isALPHANUMERIC_L1(c) _generic_isCC(c, _CC_ALPHANUMERIC)

1068

# define isBLANK_L1(c) _generic_isCC(c, _CC_BLANK)

1069

1070

/* continuation character for legal NAME in \N{NAME} */

1071

# define isCHARNAME_CONT(c) _generic_isCC(c, _CC_CHARNAME_CONT)

1072

1073

# define isCNTRL_L1(c) _generic_isCC(c, _CC_CNTRL)

1074

# define isGRAPH_L1(c) _generic_isCC(c, _CC_GRAPH)

1075

# define isLOWER_L1(c) _generic_isCC(c, _CC_LOWER)

1076

# define isPRINT_L1(c) _generic_isCC(c, _CC_PRINT)

1077

# define isPSXSPC_L1(c) _generic_isCC(c, _CC_PSXSPC)

1078

# define isPUNCT_L1(c) _generic_isCC(c, _CC_PUNCT)

1079

# define isSPACE_L1(c) _generic_isCC(c, _CC_SPACE)

1080

# define isUPPER_L1(c) _generic_isCC(c, _CC_UPPER)

1081

# define isWORDCHAR_L1(c) _generic_isCC(c, _CC_WORDCHAR)

1082

# define isIDFIRST_L1(c) _generic_isCC(c, _CC_IDFIRST)

1083

1084

/* Either participates in a fold with a character above 255, or is a

1085

* multi-char fold */

1086

# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD)))

1087

1088

# define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)

1089

# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \

1090

_generic_isCC(c, _CC_NON_FINAL_FOLD)

1091

# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \

1092

_generic_isCC(c, _CC_IS_IN_SOME_FOLD)

1093

#else /* else we don't have perl.h */

1094

1095

/* If we don't have perl.h, we are compiling a utility program. Below we

1096

* hard-code various macro definitions that wouldn't otherwise be available

1097

* to it. */

1098

# ifdef EBCDIC

1099

/* Use the native functions. They likely will return false for all

1100

* non-ASCII values, but this makes sure */

1101

# define isLOWER_A(c) (isASCII(c) && islower(c))

1102

# define isPRINT_A(c) (isASCII(c) && isprint(c))

1103

# define isUPPER_A(c) (isASCII(c) && isupper(c))

1104

# else /* ASCII platform. These are coded based on first principals */

1105

# define isLOWER_A(c) ((c) >= 'a' && (c) <= 'z')

1106

# define isPRINT_A(c) (((c) >= 32 && (c) < 127))

1107

# define isUPPER_A(c) ((c) <= 'Z' && (c) >= 'A')

1108

# endif /* Below are common definitions for ASCII and non-ASCII */

1109

# define isALPHA_A(c) (isUPPER_A(c) || isLOWER_A(c))

1110

# define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c))

1111

# define isBLANK_A(c) ((c) == ' ' || (c) == '\t')

1112

# define isCNTRL_A(c) (isASCII(c) && (! isPRINT_A(c)))

1113

# define isDIGIT_A(c) ((c) <= '9' && (c) >= '0')

1114

# define isGRAPH_A(c) (isPRINT_A(c) && (c) != ' ')

1115

# define isIDFIRST_A(c) (isALPHA_A(c) || (c) == '_')

1116

# define isPUNCT_A(c) (isGRAPH_A(c) && (! isALPHANUMERIC_A(c)))

1117

# define isSPACE_A(c) ((c) == ' ' \

|| (c) == '\t' \

|| (c) == '\n' \

|| (c) == '\r' \

|| (c) == '\v' \

|| (c) == '\f')

# define isWORDCHAR_A(c) (isALPHANUMERIC_A(c) || (c) == '_')

1124

# define isXDIGIT_A(c) (isDIGIT_A(c) \

1125

|| ((c) >= 'a' && (c) <= 'f') \

1126

|| ((c) <= 'F' && (c) >= 'A'))

1127

1128

/* The _L1 macros may be unnecessary for the utilities; I (khw) added them

1129

* during debugging, and it seems best to keep them. */

1130

# define isPSXSPC_A(c) isSPACE_A(c) /* XXX Assumes SPACE matches '\v' */

1131

# define isALPHA_L1(c) (isUPPER_L1(c) || isLOWER_L1(c))

1132

# define isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT_A(c))

1133

# define isBLANK_L1(c) (isBLANK_A(c) \

1134

|| (FITS_IN_8_BITS(c) \

1135

&& NATIVE_TO_LATIN1((U8) c) == 0xA0))

1136

# define isCNTRL_L1(c) (FITS_IN_8_BITS(c) && (! isPRINT_L1(c)))

1137

# define isGRAPH_L1(c) (isPRINT_L1(c) && (! isBLANK_L1(c)))

1138

# define isLOWER_L1(c) (isLOWER_A(c) \

1139

|| (FITS_IN_8_BITS(c) \

1140

&& ((NATIVE_TO_LATIN1((U8) c) >= 0xDF \

1141

&& NATIVE_TO_LATIN1((U8) c) != 0xF7) \

1142

|| NATIVE_TO_LATIN1((U8) c) == 0xAA \

1143

|| NATIVE_TO_LATIN1((U8) c) == 0xBA \

1144

|| NATIVE_TO_LATIN1((U8) c) == 0xB5)))

1145

# define isPRINT_L1(c) (isPRINT_A(c) \

1146

|| (FITS_IN_8_BITS(c) \

1147

&& NATIVE_TO_LATIN1((U8) c) >= 0xA0))

1148

# define isPSXSPC_L1(c) isSPACE_L1(c)

1149

# define isPUNCT_L1(c) (isPUNCT_A(c) \

1150

|| (FITS_IN_8_BITS(c) \

1151

&& (NATIVE_TO_LATIN1((U8) c) == 0xA1 \

1152

|| NATIVE_TO_LATIN1((U8) c) == 0xA7 \

1153

|| NATIVE_TO_LATIN1((U8) c) == 0xAB \

1154

|| NATIVE_TO_LATIN1((U8) c) == 0xB6 \

1155

|| NATIVE_TO_LATIN1((U8) c) == 0xB7 \

1156

|| NATIVE_TO_LATIN1((U8) c) == 0xBB \

1157

|| NATIVE_TO_LATIN1((U8) c) == 0xBF)))

1158

# define isSPACE_L1(c) (isSPACE_A(c) \

1159

|| (FITS_IN_8_BITS(c) \

1160

&& (NATIVE_TO_LATIN1((U8) c) == 0x85 \

1161

|| NATIVE_TO_LATIN1((U8) c) == 0xA0)))

1162

# define isUPPER_L1(c) (isUPPER_A(c) \

1163

|| (FITS_IN_8_BITS(c) \

1164

&& (NATIVE_TO_LATIN1((U8) c) >= 0xC0 \

1165

&& NATIVE_TO_LATIN1((U8) c) <= 0xDE \

1166

&& NATIVE_TO_LATIN1((U8) c) != 0xD7)))

1167

# define isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT_A(c))

1168

# define isIDFIRST_L1(c) (isALPHA_L1(c) || NATIVE_TO_LATIN1(c) == '_')

1169

# define isCHARNAME_CONT(c) (isWORDCHAR_L1(c) \

|| isBLANK_L1(c) \

|| (c) == '-' \

|| (c) == '(' \

|| (c) == ')')

/* The following are not fully accurate in the above-ASCII range. I (khw)

1175

* don't think it's necessary to be so for the purposes where this gets

1176

* compiled */

1177

# define _isQUOTEMETA(c) (FITS_IN_8_BITS(c) && ! isWORDCHAR_L1(c))

1178

# define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) isALPHA_L1(c)

1179

1180

/* And these aren't accurate at all. They are useful only for above

1181

* Latin1, which utilities and bootstrapping don't deal with */

1182

# define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) 0

1183

# define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) 0

1184

1185

/* Many of the macros later in this file are defined in terms of these. By

1186

* implementing them with a function, which converts the class number into

1187

* a call to the desired macro, all of the later ones work. However, that

1188

* function won't be actually defined when building a utility program (no

1189

* perl.h), and so a compiler error will be generated if one is attempted

1190

* to be used. And the above-Latin1 code points require Unicode tables to

1191

* be present, something unlikely to be the case when bootstrapping */

1192

# define _generic_isCC(c, classnum) \

1193

(FITS_IN_8_BITS(c) && S_bootstrap_ctype((U8) (c), (classnum), TRUE))

1194

# define _generic_isCC_A(c, classnum) \

1195

(FITS_IN_8_BITS(c) && S_bootstrap_ctype((U8) (c), (classnum), FALSE))

1196

#endif /* End of no perl.h */

1197

1198

#define isALPHANUMERIC(c) isALPHANUMERIC_A(c)

1199

#define isALPHA(c) isALPHA_A(c)

1200

#define isBLANK(c) isBLANK_A(c)

1201

#define isCNTRL(c) isCNTRL_A(c)

1202

#define isDIGIT(c) isDIGIT_A(c)

1203

#define isGRAPH(c) isGRAPH_A(c)

1204

#define isIDFIRST(c) isIDFIRST_A(c)

1205

#define isLOWER(c) isLOWER_A(c)

1206

#define isPRINT(c) isPRINT_A(c)

1207

#define isPSXSPC(c) isPSXSPC_A(c)

1208

#define isPUNCT(c) isPUNCT_A(c)

1209

#define isSPACE(c) isSPACE_A(c)

1210

#define isUPPER(c) isUPPER_A(c)

1211

#define isWORDCHAR(c) isWORDCHAR_A(c)

1212

#define isXDIGIT(c) isXDIGIT_A(c)

1213

1214

/* ASCII casing. These could also be written as

1215

#define toLOWER(c) (isASCII(c) ? toLOWER_LATIN1(c) : (c))

1216

#define toUPPER(c) (isASCII(c) ? toUPPER_LATIN1_MOD(c) : (c))

1217

which uses table lookup and mask instead of subtraction. (This would

1218

work because the _MOD does not apply in the ASCII range) */

1219

#define toLOWER(c) (isUPPER(c) ? (U8)((c) + ('a' - 'A')) : (c))

1220

#define toUPPER(c) (isLOWER(c) ? (U8)((c) - ('a' - 'A')) : (c))

1221

1222

/* In the ASCII range, these are equivalent to what they're here defined to be.

1223

* But by creating these definitions, other code doesn't have to be aware of

1224

* this detail */

1225

#define toFOLD(c) toLOWER(c)

1226

#define toTITLE(c) toUPPER(c)

1227

1228

#define toLOWER_A(c) toLOWER(c)

1229

#define toUPPER_A(c) toUPPER(c)

1230

#define toFOLD_A(c) toFOLD(c)

1231

#define toTITLE_A(c) toTITLE(c)

1232

1233

/* Use table lookup for speed; returns the input itself if is out-of-range */

1234

#define toLOWER_LATIN1(c) ((! FITS_IN_8_BITS(c)) \

1235

? (c) \

1236

: PL_latin1_lc[ (U8) (c) ])

1237

#define toLOWER_L1(c) toLOWER_LATIN1(c) /* Synonym for consistency */

1238

1239

/* Modified uc. Is correct uc except for three non-ascii chars which are

1240

* all mapped to one of them, and these need special handling; returns the

1241

* input itself if is out-of-range */

1242

#define toUPPER_LATIN1_MOD(c) ((! FITS_IN_8_BITS(c)) \

1243

? (c) \

1244

: PL_mod_latin1_uc[ (U8) (c) ])

1245

#define IN_UTF8_CTYPE_LOCALE PL_in_utf8_CTYPE_locale

1246

1247

/* Use foo_LC_uvchr() instead of these for beyond the Latin1 range */

1248

1249

/* For internal core Perl use only: the base macro for defining macros like

1250

* isALPHA_LC, which uses the current LC_CTYPE locale. 'c' is the code point

1251

* (0-255) to check. In a UTF-8 locale, the result is the same as calling

1252

* isFOO_L1(); the 'utf8_locale_classnum' parameter is something like

1253

* _CC_UPPER, which gives the class number for doing this. For non-UTF-8

1254

* locales, the code to actually do the test this is passed in 'non_utf8'. If

1255

* 'c' is above 255, 0 is returned. For accessing the full range of possible

1256

* code points under locale rules, use the macros based on _generic_LC_uvchr

1257

* instead of this. */

1258

#define _generic_LC_base(c, utf8_locale_classnum, non_utf8) \

1259

(! FITS_IN_8_BITS(c) \

1260

? 0 \

1261

: IN_UTF8_CTYPE_LOCALE \

1262

? cBOOL(PL_charclass[(U8) (c)] & _CC_mask(utf8_locale_classnum)) \

1263

: cBOOL(non_utf8))

1264

1265

/* For internal core Perl use only: a helper macro for defining macros like

1266

* isALPHA_LC. 'c' is the code point (0-255) to check. The function name to

1267

* actually do this test is passed in 'non_utf8_func', which is called on 'c',

1268

* casting 'c' to the macro _LC_CAST, which should not be parenthesized. See

1269

* _generic_LC_base for more info */

1270

#define _generic_LC(c, utf8_locale_classnum, non_utf8_func) \

1271

_generic_LC_base(c,utf8_locale_classnum, \

1272

non_utf8_func( (_LC_CAST) (c)))

1273

1274

/* For internal core Perl use only: like _generic_LC, but also returns TRUE if

1275

* 'c' is the platform's native underscore character */

1276

#define _generic_LC_underscore(c,utf8_locale_classnum,non_utf8_func) \

1277

_generic_LC_base(c, utf8_locale_classnum, \

1278

(non_utf8_func( (_LC_CAST) (c)) \

1279

|| (char)(c) == '_'))

1280

1281

/* These next three are also for internal core Perl use only: case-change

1282

* helper macros */

1283

#define _generic_toLOWER_LC(c, function, cast) (! FITS_IN_8_BITS(c) \

1284

? (c) \

1285

: (IN_UTF8_CTYPE_LOCALE) \

1286

? PL_latin1_lc[ (U8) (c) ] \

1287

: function((cast)(c)))

1288

1289

/* Note that the result can be larger than a byte in a UTF-8 locale. It

1290

* returns a single value, so can't adequately return the upper case of LATIN

1291

* SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two

1292

* values "SS"); instead it asserts against that under DEBUGGING, and

1293

* otherwise returns its input */

1294

#define _generic_toUPPER_LC(c, function, cast) \

1295

(! FITS_IN_8_BITS(c) \

1296

? (c) \

1297

: ((! IN_UTF8_CTYPE_LOCALE) \

1298

? function((cast)(c)) \

1299

: ((((U8)(c)) == MICRO_SIGN) \

1300

? GREEK_CAPITAL_LETTER_MU \

1301

: ((((U8)(c)) == LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) \

1302

? LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS \

1303

: ((((U8)(c)) == LATIN_SMALL_LETTER_SHARP_S) \

1304

? (__ASSERT_(0) (c)) \

1305

: PL_mod_latin1_uc[ (U8) (c) ])))))

1306

1307

/* Note that the result can be larger than a byte in a UTF-8 locale. It

1308

* returns a single value, so can't adequately return the fold case of LATIN

1309

* SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two

1310

* values "ss"); instead it asserts against that under DEBUGGING, and

1311

* otherwise returns its input */

1312

#define _generic_toFOLD_LC(c, function, cast) \

1313

((UNLIKELY((c) == MICRO_SIGN) && IN_UTF8_CTYPE_LOCALE) \

1314

? GREEK_SMALL_LETTER_MU \

1315

: (__ASSERT_(! IN_UTF8_CTYPE_LOCALE \

1316

|| (c) != LATIN_SMALL_LETTER_SHARP_S) \

1317

_generic_toLOWER_LC(c, function, cast)))

1318

1319

/* Use the libc versions for these if available. */

1320

#if defined(HAS_ISASCII) && ! defined(USE_NEXT_CTYPE)

1321

# define isASCII_LC(c) (FITS_IN_8_BITS(c) && isascii( (U8) (c)))

1322

#else

1323

# define isASCII_LC(c) isASCII(c)

1324

#endif

1325

1326

#if defined(HAS_ISBLANK) && ! defined(USE_NEXT_CTYPE)

1327

# define isBLANK_LC(c) _generic_LC(c, _CC_BLANK, isblank)

1328

#else /* Unlike isASCII, varies if in a UTF-8 locale */

1329

# define isBLANK_LC(c) (IN_UTF8_CTYPE_LOCALE) ? isBLANK_L1(c) : isBLANK(c)

1330

#endif

1331

1332

#ifdef USE_NEXT_CTYPE /* NeXT computers */

1333

1334

# define _LC_CAST unsigned int /* Needed by _generic_LC. NeXT functions

1335

use this as their input type */

1336

1337

# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, NXIsAlpha)

1338

# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, NXIsAlNum)

1339

# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, NXIsCntrl)

1340

# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, NXIsDigit)

1341

# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, NXIsGraph)

1342

# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, NXIsAlpha)

1343

# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, NXIsLower)

1344

# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, NXIsPrint)

1345

# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, NXIsPunct)

1346

# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, NXIsSpace)

1347

# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, NXIsUpper)

1348

# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, NXIsAlNum)

1349

# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, NXIsXdigit)

1350

1351

# define toLOWER_LC(c) _generic_toLOWER_LC((c), NXToLower, unsigned int)

1352

# define toUPPER_LC(c) _generic_toUPPER_LC((c), NXToUpper, unsigned int)

1353

# define toFOLD_LC(c) _generic_toFOLD_LC((c), NXToLower, unsigned int)

1354

1355

#else /* !USE_NEXT_CTYPE */

# define _LC_CAST U8

# if defined(CTYPE256) || (!defined(isascii) && !defined(HAS_ISASCII))

1360

/* For most other platforms */

1361

1362

# define isALPHA_LC(c) _generic_LC(c, _CC_ALPHA, isalpha)

1363

# define isALPHANUMERIC_LC(c) _generic_LC(c, _CC_ALPHANUMERIC, isalnum)

1364

# define isCNTRL_LC(c) _generic_LC(c, _CC_CNTRL, iscntrl)

1365

# define isDIGIT_LC(c) _generic_LC(c, _CC_DIGIT, isdigit)

1366

# define isGRAPH_LC(c) _generic_LC(c, _CC_GRAPH, isgraph)

1367

# define isIDFIRST_LC(c) _generic_LC_underscore(c, _CC_IDFIRST, isalpha)

1368

# define isLOWER_LC(c) _generic_LC(c, _CC_LOWER, islower)

1369

# define isPRINT_LC(c) _generic_LC(c, _CC_PRINT, isprint)

1370

# define isPUNCT_LC(c) _generic_LC(c, _CC_PUNCT, ispunct)

1371

# define isSPACE_LC(c) _generic_LC(c, _CC_SPACE, isspace)

1372

# define isUPPER_LC(c) _generic_LC(c, _CC_UPPER, isupper)

1373

# define isWORDCHAR_LC(c) _generic_LC_underscore(c, _CC_WORDCHAR, isalnum)

1374

# define isXDIGIT_LC(c) _generic_LC(c, _CC_XDIGIT, isxdigit)

1375

1376

1377

# define toLOWER_LC(c) _generic_toLOWER_LC((c), tolower, U8)

1378

# define toUPPER_LC(c) _generic_toUPPER_LC((c), toupper, U8)

1379

# define toFOLD_LC(c) _generic_toFOLD_LC((c), tolower, U8)

1380

1381

# else /* The final fallback position */

1382

1383

# define isALPHA_LC(c) (isascii(c) && isalpha(c))

1384

# define isALPHANUMERIC_LC(c) (isascii(c) && isalnum(c))

1385

# define isCNTRL_LC(c) (isascii(c) && iscntrl(c))

1386

# define isDIGIT_LC(c) (isascii(c) && isdigit(c))

1387

# define isGRAPH_LC(c) (isascii(c) && isgraph(c))

1388

# define isIDFIRST_LC(c) (isascii(c) && (isalpha(c) || (c) == '_'))

1389

# define isLOWER_LC(c) (isascii(c) && islower(c))

1390

# define isPRINT_LC(c) (isascii(c) && isprint(c))

1391

# define isPUNCT_LC(c) (isascii(c) && ispunct(c))

1392

# define isSPACE_LC(c) (isascii(c) && isspace(c))

1393

# define isUPPER_LC(c) (isascii(c) && isupper(c))

1394

# define isWORDCHAR_LC(c) (isascii(c) && (isalnum(c) || (c) == '_'))

1395

# define isXDIGIT_LC(c) (isascii(c) && isxdigit(c))

1396

1397

# define toLOWER_LC(c) (isascii(c) ? tolower(c) : (c))

1398

# define toUPPER_LC(c) (isascii(c) ? toupper(c) : (c))

1399

# define toFOLD_LC(c) (isascii(c) ? tolower(c) : (c))

1400

1401

# endif

1402

#endif /* USE_NEXT_CTYPE */

1403

1404

#define isIDCONT(c) isWORDCHAR(c)

1405

#define isIDCONT_A(c) isWORDCHAR_A(c)

1406

#define isIDCONT_L1(c) isWORDCHAR_L1(c)

1407

#define isIDCONT_LC(c) isWORDCHAR_LC(c)

1408

#define isPSXSPC_LC(c) isSPACE_LC(c)

1409

1410

/* For internal core Perl use only: the base macros for defining macros like

1411

* isALPHA_uni. 'c' is the code point to check. 'classnum' is the POSIX class

1412

* number defined earlier in this file. _generic_uni() is used for POSIX

1413

* classes where there is a macro or function 'above_latin1' that takes the

1414

* single argument 'c' and returns the desired value. These exist for those

1415

* classes which have simple definitions, avoiding the overhead of a hash

1416

* lookup or inversion list binary search. _generic_swash_uni() can be used

1417

* for classes where that overhead is faster than a direct lookup.

1418

* _generic_uni() won't compile if 'c' isn't unsigned, as it won't match the

1419

* 'above_latin1' prototype. _generic_isCC() macro does bounds checking, so

1420

* have duplicate checks here, so could create versions of the macros that

1421

* don't, but experiments show that gcc optimizes them out anyway. */

1422

1423

/* Note that all ignore 'use bytes' */

1424

#define _generic_uni(classnum, above_latin1, c) ((c) < 256 \

1425

? _generic_isCC(c, classnum) \

1426

: above_latin1(c))

1427

#define _generic_swash_uni(classnum, c) ((c) < 256 \

1428

? _generic_isCC(c, classnum) \

1429

: _is_uni_FOO(classnum, c))

1430

#define isALPHA_uni(c) _generic_swash_uni(_CC_ALPHA, c)

1431

#define isALPHANUMERIC_uni(c) _generic_swash_uni(_CC_ALPHANUMERIC, c)

1432

#define isASCII_uni(c) isASCII(c)

1433

#define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c)

1434

#define isCNTRL_uni(c) isCNTRL_L1(c) /* All controls are in Latin1 */

1435

#define isDIGIT_uni(c) _generic_swash_uni(_CC_DIGIT, c)

1436

#define isGRAPH_uni(c) _generic_swash_uni(_CC_GRAPH, c)

1437

#define isIDCONT_uni(c) _generic_uni(_CC_WORDCHAR, _is_uni_perl_idcont, c)

1438

#define isIDFIRST_uni(c) _generic_uni(_CC_IDFIRST, _is_uni_perl_idstart, c)

1439

#define isLOWER_uni(c) _generic_swash_uni(_CC_LOWER, c)

1440

#define isPRINT_uni(c) _generic_swash_uni(_CC_PRINT, c)

1441

1442

/* Posix and regular space are identical above Latin1 */

1443

#define isPSXSPC_uni(c) _generic_uni(_CC_PSXSPC, is_XPERLSPACE_cp_high, c)

1444

1445

#define isPUNCT_uni(c) _generic_swash_uni(_CC_PUNCT, c)

1446

#define isSPACE_uni(c) _generic_uni(_CC_SPACE, is_XPERLSPACE_cp_high, c)

1447

#define isUPPER_uni(c) _generic_swash_uni(_CC_UPPER, c)

1448

#define isVERTWS_uni(c) _generic_uni(_CC_VERTSPACE, is_VERTWS_cp_high, c)

1449

#define isWORDCHAR_uni(c) _generic_swash_uni(_CC_WORDCHAR, c)

1450

#define isXDIGIT_uni(c) _generic_uni(_CC_XDIGIT, is_XDIGIT_cp_high, c)

1451

1452

#define toFOLD_uni(c,s,l) to_uni_fold(c,s,l)

1453

#define toLOWER_uni(c,s,l) to_uni_lower(c,s,l)

1454

#define toTITLE_uni(c,s,l) to_uni_title(c,s,l)

1455

#define toUPPER_uni(c,s,l) to_uni_upper(c,s,l)

1456

1457

/* For internal core Perl use only: the base macros for defining macros like

1458

* isALPHA_LC_uvchr. These are like isALPHA_LC, but the input can be any code

1459

* point, not just 0-255. Like _generic_uni, there are two versions, one for

1460

* simple class definitions; the other for more complex. These are like

1461

* _generic_uni, so see it for more info. */

1462

#define _generic_LC_uvchr(latin1, above_latin1, c) \

1463

(c < 256 ? latin1(c) : above_latin1(c))

1464

#define _generic_LC_swash_uvchr(latin1, classnum, c) \

1465

(c < 256 ? latin1(c) : _is_uni_FOO(classnum, c))

1466

1467

#define isALPHA_LC_uvchr(c) _generic_LC_swash_uvchr(isALPHA_LC, _CC_ALPHA, c)

1468

#define isALPHANUMERIC_LC_uvchr(c) _generic_LC_swash_uvchr(isALPHANUMERIC_LC, \

1469

_CC_ALPHANUMERIC, c)

1470

#define isASCII_LC_uvchr(c) isASCII_LC(c)

1471

#define isBLANK_LC_uvchr(c) _generic_LC_uvchr(isBLANK_LC, is_HORIZWS_cp_high, c)

1472

#define isCNTRL_LC_uvchr(c) (c < 256 ? isCNTRL_LC(c) : 0)

1473

#define isDIGIT_LC_uvchr(c) _generic_LC_swash_uvchr(isDIGIT_LC, _CC_DIGIT, c)

1474

#define isGRAPH_LC_uvchr(c) _generic_LC_swash_uvchr(isGRAPH_LC, _CC_GRAPH, c)

1475

#define isIDCONT_LC_uvchr(c) _generic_LC_uvchr(isIDCONT_LC, \

1476

_is_uni_perl_idcont, c)

1477

#define isIDFIRST_LC_uvchr(c) _generic_LC_uvchr(isIDFIRST_LC, \

1478

_is_uni_perl_idstart, c)

1479

#define isLOWER_LC_uvchr(c) _generic_LC_swash_uvchr(isLOWER_LC, _CC_LOWER, c)

1480

#define isPRINT_LC_uvchr(c) _generic_LC_swash_uvchr(isPRINT_LC, _CC_PRINT, c)

1481

#define isPSXSPC_LC_uvchr(c) isSPACE_LC_uvchr(c) /* space is identical to posix

1482

space under locale */

1483

#define isPUNCT_LC_uvchr(c) _generic_LC_swash_uvchr(isPUNCT_LC, _CC_PUNCT, c)

1484

#define isSPACE_LC_uvchr(c) _generic_LC_uvchr(isSPACE_LC, \

1485

is_XPERLSPACE_cp_high, c)

1486

#define isUPPER_LC_uvchr(c) _generic_LC_swash_uvchr(isUPPER_LC, _CC_UPPER, c)

1487

#define isWORDCHAR_LC_uvchr(c) _generic_LC_swash_uvchr(isWORDCHAR_LC, \

1488

_CC_WORDCHAR, c)

1489

#define isXDIGIT_LC_uvchr(c) _generic_LC_uvchr(isXDIGIT_LC, is_XDIGIT_cp_high, c)

1490

1491

#define isBLANK_LC_uni(c) isBLANK_LC_uvchr(UNI_TO_NATIVE(c))

1492

1493

/* For internal core Perl use only: the base macros for defining macros like

1494

* isALPHA_utf8. These are like the earlier defined macros, but take an input

1495

* UTF-8 encoded string 'p'. If the input is in the Latin1 range, use

1496

* the Latin1 macro 'classnum' on 'p'. Otherwise use the value given by the

1497

* 'utf8' parameter. This relies on the fact that ASCII characters have the

1498

* same representation whether utf8 or not. Note that it assumes that the utf8

1499

* has been validated, and ignores 'use bytes' */

1500

#define _generic_utf8(classnum, p, utf8) (UTF8_IS_INVARIANT(*(p)) \

1501

? _generic_isCC(*(p), classnum) \

1502

: (UTF8_IS_DOWNGRADEABLE_START(*(p))) \

1503

? _generic_isCC( \

1504

TWO_BYTE_UTF8_TO_NATIVE(*(p), \

*((p)+1 )), \

classnum) \

: utf8)

/* Like the above, but calls 'above_latin1(p)' to get the utf8 value. 'above_latin1'

1509

* can be a macro */

1510

#define _generic_func_utf8(classnum, above_latin1, p) \

1511

_generic_utf8(classnum, p, above_latin1(p))

1512

/* Like the above, but passes classnum to _isFOO_utf8(), instead of having an

1513

* 'above_latin1' parameter */

1514

#define _generic_swash_utf8(classnum, p) \

1515

_generic_utf8(classnum, p, _is_utf8_FOO(classnum, p))

1516

1517

/* Like the above, but should be used only when it is known that there are no

1518

* characters in the upper-Latin1 range (128-255 on ASCII platforms) which the

1519

* class is TRUE for. Hence it can skip the tests for this range.

1520

* 'above_latin1' should include its arguments */

1521

#define _generic_utf8_no_upper_latin1(classnum, p, above_latin1) \

1522

(UTF8_IS_INVARIANT(*(p)) \

1523

? _generic_isCC(*(p), classnum) \

1524

: (UTF8_IS_ABOVE_LATIN1(*(p))) \

? above_latin1 \

: 0)

/* NOTE that some of these macros have very similar ones in regcharclass.h.

1529

* For example, there is (at the time of this writing) an 'is_SPACE_utf8()'

1530

* there, differing in name only by an underscore from the one here

1531

* 'isSPACE_utf8(). The difference is that the ones here are probably more

1532

* efficient and smaller, using an O(1) array lookup for Latin1-range code

1533

* points; the regcharclass.h ones are implemented as a series of

1534

* "if-else-if-else ..." */

1535

1536

#define isALPHA_utf8(p) _generic_swash_utf8(_CC_ALPHA, p)

1537

#define isALPHANUMERIC_utf8(p) _generic_swash_utf8(_CC_ALPHANUMERIC, p)

1538

#define isASCII_utf8(p) isASCII(*p) /* Because ASCII is invariant under

1539

utf8, the non-utf8 macro works

1540

*/

1541

#define isBLANK_utf8(p) _generic_func_utf8(_CC_BLANK, is_HORIZWS_high, p)

1542

1543

#ifdef EBCDIC

1544

/* Because all controls are UTF-8 invariants in EBCDIC, we can use this

1545

* more efficient macro instead of the more general one */

1546

# define isCNTRL_utf8(p) isCNTRL_L1(p)

1547

#else

1548

# define isCNTRL_utf8(p) _generic_utf8(_CC_CNTRL, p, 0)

1549

#endif

1550

1551

#define isDIGIT_utf8(p) _generic_utf8_no_upper_latin1(_CC_DIGIT, p, \

1552

_is_utf8_FOO(_CC_DIGIT, p))

1553

#define isGRAPH_utf8(p) _generic_swash_utf8(_CC_GRAPH, p)

1554

#define isIDCONT_utf8(p) _generic_func_utf8(_CC_WORDCHAR, \

1555

_is_utf8_perl_idcont, p)

1556

1557

/* To prevent S_scan_word in toke.c from hanging, we have to make sure that

1558

* IDFIRST is an alnum. See

1559

* http://rt.perl.org/rt3/Ticket/Display.html?id=74022 for more detail than you

1560

* ever wanted to know about. (In the ASCII range, there isn't a difference.)

1561

* This used to be not the XID version, but we decided to go with the more

1562

* modern Unicode definition */

1563

#define isIDFIRST_utf8(p) _generic_func_utf8(_CC_IDFIRST, \

1564

_is_utf8_perl_idstart, p)

1565

1566

#define isLOWER_utf8(p) _generic_swash_utf8(_CC_LOWER, p)

1567

#define isPRINT_utf8(p) _generic_swash_utf8(_CC_PRINT, p)

1568

1569

/* Posix and regular space are identical above Latin1 */

1570

#define isPSXSPC_utf8(p) _generic_func_utf8(_CC_PSXSPC, is_XPERLSPACE_high, p)

1571

1572

#define isPUNCT_utf8(p) _generic_swash_utf8(_CC_PUNCT, p)

1573

#define isSPACE_utf8(p) _generic_func_utf8(_CC_SPACE, is_XPERLSPACE_high, p)

1574

#define isUPPER_utf8(p) _generic_swash_utf8(_CC_UPPER, p)

1575

#define isVERTWS_utf8(p) _generic_func_utf8(_CC_VERTSPACE, is_VERTWS_high, p)

1576

#define isWORDCHAR_utf8(p) _generic_swash_utf8(_CC_WORDCHAR, p)

1577

#define isXDIGIT_utf8(p) _generic_utf8_no_upper_latin1(_CC_XDIGIT, p, \

1578

is_XDIGIT_high(p))

1579

1580

#define toFOLD_utf8(p,s,l) to_utf8_fold(p,s,l)

1581

#define toLOWER_utf8(p,s,l) to_utf8_lower(p,s,l)

1582

#define toTITLE_utf8(p,s,l) to_utf8_title(p,s,l)

1583

#define toUPPER_utf8(p,s,l) to_utf8_upper(p,s,l)

1584

1585

/* For internal core Perl use only: the base macros for defining macros like

1586

* isALPHA_LC_utf8. These are like _generic_utf8, but if the first code point

1587

* in 'p' is within the 0-255 range, it uses locale rules from the passed-in

1588

* 'macro' parameter */

1589

#define _generic_LC_utf8(macro, p, utf8) \

1590

(UTF8_IS_INVARIANT(*(p)) \

1591

? macro(*(p)) \

1592

: (UTF8_IS_DOWNGRADEABLE_START(*(p))) \

1593

? macro(TWO_BYTE_UTF8_TO_NATIVE(*(p), *((p)+1))) \

1594

: utf8)

1595

1596

#define _generic_LC_swash_utf8(macro, classnum, p) \

1597

_generic_LC_utf8(macro, p, _is_utf8_FOO(classnum, p))

1598

#define _generic_LC_func_utf8(macro, above_latin1, p) \

1599

_generic_LC_utf8(macro, p, above_latin1(p))

1600

1601

#define isALPHANUMERIC_LC_utf8(p) _generic_LC_swash_utf8(isALPHANUMERIC_LC, \

1602

_CC_ALPHANUMERIC, p)

1603

#define isALPHA_LC_utf8(p) _generic_LC_swash_utf8(isALPHA_LC, _CC_ALPHA, p)

1604

#define isASCII_LC_utf8(p) isASCII_LC(*p)

1605

#define isBLANK_LC_utf8(p) _generic_LC_func_utf8(isBLANK_LC, is_HORIZWS_high, p)

1606

#define isCNTRL_LC_utf8(p) _generic_LC_utf8(isCNTRL_LC, p, 0)

1607

#define isDIGIT_LC_utf8(p) _generic_LC_swash_utf8(isDIGIT_LC, _CC_DIGIT, p)

1608

#define isGRAPH_LC_utf8(p) _generic_LC_swash_utf8(isGRAPH_LC, _CC_GRAPH, p)

1609

#define isIDCONT_LC_utf8(p) _generic_LC_func_utf8(isIDCONT_LC, _is_utf8_perl_idcont, p)

1610

#define isIDFIRST_LC_utf8(p) _generic_LC_func_utf8(isIDFIRST_LC, _is_utf8_perl_idstart, p)

1611

#define isLOWER_LC_utf8(p) _generic_LC_swash_utf8(isLOWER_LC, _CC_LOWER, p)

1612

#define isPRINT_LC_utf8(p) _generic_LC_swash_utf8(isPRINT_LC, _CC_PRINT, p)

1613

#define isPSXSPC_LC_utf8(p) isSPACE_LC_utf8(p) /* space is identical to posix

1614

space under locale */

1615

#define isPUNCT_LC_utf8(p) _generic_LC_swash_utf8(isPUNCT_LC, _CC_PUNCT, p)

1616

#define isSPACE_LC_utf8(p) _generic_LC_func_utf8(isSPACE_LC, is_XPERLSPACE_high, p)

1617

#define isUPPER_LC_utf8(p) _generic_LC_swash_utf8(isUPPER_LC, _CC_UPPER, p)

1618

#define isWORDCHAR_LC_utf8(p) _generic_LC_swash_utf8(isWORDCHAR_LC, \

1619

_CC_WORDCHAR, p)

1620

#define isXDIGIT_LC_utf8(p) _generic_LC_func_utf8(isXDIGIT_LC, is_XDIGIT_high, p)

1621

1622

/* Macros for backwards compatibility and for completeness when the ASCII and

1623

* Latin1 values are identical */

1624

#define isALPHAU(c) isALPHA_L1(c)

1625

#define isDIGIT_L1(c) isDIGIT_A(c)

1626

#define isOCTAL(c) isOCTAL_A(c)

1627

#define isOCTAL_L1(c) isOCTAL_A(c)

1628

#define isXDIGIT_L1(c) isXDIGIT_A(c)

1629

#define isALNUM(c) isWORDCHAR(c)

1630

#define isALNUMU(c) isWORDCHAR_L1(c)

1631

#define isALNUM_LC(c) isWORDCHAR_LC(c)

1632

#define isALNUM_uni(c) isWORDCHAR_uni(c)

1633

#define isALNUM_LC_uvchr(c) isWORDCHAR_LC_uvchr(c)

1634

#define isALNUM_utf8(p) isWORDCHAR_utf8(p)

1635

#define isALNUM_LC_utf8(p) isWORDCHAR_LC_utf8(p)

1636

#define isALNUMC_A(c) isALPHANUMERIC_A(c) /* Mnemonic: "C's alnum" */

1637

#define isALNUMC_L1(c) isALPHANUMERIC_L1(c)

1638

#define isALNUMC(c) isALPHANUMERIC(c)

1639

#define isALNUMC_LC(c) isALPHANUMERIC_LC(c)

1640

#define isALNUMC_uni(c) isALPHANUMERIC_uni(c)

1641

#define isALNUMC_LC_uvchr(c) isALPHANUMERIC_LC_uvchr(c)

1642

#define isALNUMC_utf8(p) isALPHANUMERIC_utf8(p)

1643

#define isALNUMC_LC_utf8(p) isALPHANUMERIC_LC_utf8(p)

1644

1645

/* On EBCDIC platforms, CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII,

1646

* except that they don't necessarily mean the same characters, e.g. CTRL-D is

1647

* 4 on both systems, but that is EOT on ASCII; ST on EBCDIC.

1648

* '?' is special-cased on EBCDIC to APC, which is the control there that is

1649

* the outlier from the block that contains the other controls, just like

1650

* toCTRL('?') on ASCII yields DEL, the control that is the outlier from the C0

1651

* block. If it weren't special cased, it would yield a non-control.

1652

* The conversion works both ways, so CTRL('D') is 4, and CTRL(4) is D, etc. */

1653

#ifndef EBCDIC

1654

# define toCTRL(c) (toUPPER(c) ^ 64)

1655

#else

1656

# define toCTRL(c) ((isPRINT_A(c)) \

1657

? UNLIKELY((c) == '?') \

1658

? QUESTION_MARK_CTRL \

1659

: (NATIVE_TO_LATIN1(toUPPER(c)) ^ 64) \

1660

: UNLIKELY((c) == QUESTION_MARK_CTRL) \

1661

? ((c) == '?') \

1662

: (LATIN1_TO_NATIVE((c) ^ 64)))

1663

#endif

1664

1665

/* Line numbers are unsigned, 32 bits. */

1666

typedef U32 line_t;

1667

#define NOLINE ((line_t) 4294967295UL) /* = FFFFFFFF */

1668

1669

/* Helpful alias for version prescan */

1670

#define is_LAX_VERSION(a,b) \

1671

(a != Perl_prescan_version(aTHX_ a, FALSE, b, NULL, NULL, NULL, NULL))

1672

1673

#define is_STRICT_VERSION(a,b) \

1674

(a != Perl_prescan_version(aTHX_ a, TRUE, b, NULL, NULL, NULL, NULL))

1675

1676

#define BADVERSION(a,b,c) \

if (b) { \

*b = c; \

} \

return a;

/* Converts a character known to represent a hexadecimal digit (0-9, A-F, or

1683

* a-f) to its numeric value. READ_XDIGIT's argument is a string pointer,

1684

* which is advanced. The input is validated only by an assert() in DEBUGGING

1685

* builds. In both ASCII and EBCDIC the last 4 bits of the digits are 0-9; and

1686

* the last 4 bits of A-F and a-f are 1-6, so adding 9 yields 10-15 */

1687

#define XDIGIT_VALUE(c) (__ASSERT_(isXDIGIT(c)) (0xf & (isDIGIT(c) \

1688

? (c) \

1689

: ((c) + 9))))

1690

#define READ_XDIGIT(s) (__ASSERT_(isXDIGIT(*s)) (0xf & (isDIGIT(*(s)) \

? (*(s)++) \

: (*(s)++ + 9))))

/* Converts a character known to represent an octal digit (0-7) to its numeric

1695

* value. The input is validated only by an assert() in DEBUGGING builds. In

1696

* both ASCII and EBCDIC the last 3 bits of the octal digits range from 0-7. */

1697

#define OCTAL_VALUE(c) (__ASSERT_(isOCTAL(c)) (7 & (c)))

1698

1699

/*

1700

=head1 Memory Management

1701

1702

1703

The XSUB-writer's interface to the C C<malloc> function.

1704

1705

Memory obtained by this should B<ONLY> be freed with L<"Safefree">.

1706

1707

In 5.9.3, Newx() and friends replace the older New() API, and drops

1708

the first parameter, I<x>, a debug aid which allowed callers to identify

1709

themselves. This aid has been superseded by a new build option,

1710

PERL_MEM_LOG (see L<perlhacktips/PERL_MEM_LOG>). The older API is still

1711

there for use in XS modules supporting older perls.

1712

1713

1714

The XSUB-writer's interface to the C C<malloc> function, with

1715

cast. See also C<Newx>.

1716

1717

Memory obtained by this should B<ONLY> be freed with L<"Safefree">.

1718

1719

1720

The XSUB-writer's interface to the C C<malloc> function. The allocated

1721

memory is zeroed with C<memzero>. See also C<Newx>.

1722

1723

Memory obtained by this should B<ONLY> be freed with L<"Safefree">.

1724

1725

1726

The XSUB-writer's interface to the C C<realloc> function.

1727

1728

Memory obtained by this should B<ONLY> be freed with L<"Safefree">.

1729

1730

1731

The XSUB-writer's interface to the C C<realloc> function, with

1732

cast.

1733

1734

Memory obtained by this should B<ONLY> be freed with L<"Safefree">.

1735

1736

=for apidoc Am|void|Safefree|void* ptr

1737

The XSUB-writer's interface to the C C<free> function.

1738

1739

This should B<ONLY> be used on memory obtained using L<"Newx"> and friends.

1740

1741

1742

The XSUB-writer's interface to the C C<memmove> function. The C<src> is the

1743

source, C<dest> is the destination, C<nitems> is the number of items, and

1744

C<type> is the type. Can do overlapping moves. See also C<Copy>.

1745

1746

1747

Like C<Move> but returns dest. Useful

1748

for encouraging compilers to tail-call

optimise.

The XSUB-writer's interface to the C C<memcpy> function. The C<src> is the

1753

source, C<dest> is the destination, C<nitems> is the number of items, and

1754

C<type> is the type. May fail on overlapping copies. See also C<Move>.

Like C<Copy> but returns dest. Useful

1759

for encouraging compilers to tail-call

optimise.

The XSUB-writer's interface to the C C<memzero> function. The C<dest> is the

1765

destination, C<nitems> is the number of items, and C<type> is the type.

Like C<Zero> but returns dest. Useful

1770

for encouraging compilers to tail-call

optimise.

This is an architecture-independent macro to copy one structure to another.

Fill up memory with a byte pattern (a byte repeated over and over

1779

again) that hopefully catches attempts to access uninitialized memory.

PoisonWith(0xAB) for catching access to allocated but uninitialized memory.

PoisonWith(0xEF) for catching access to freed memory.

PoisonWith(0xEF) for catching access to freed memory.

=cut */

/* Maintained for backwards-compatibility only. Use newSV() instead. */

1796

#ifndef PERL_CORE

1797

#define NEWSV(x,len) newSV(len)

1798

#endif

1799

1800

#define MEM_SIZE_MAX ((MEM_SIZE)~0)

1801

1802

/* The +0.0 in MEM_WRAP_CHECK_ is an attempt to foil

1803

* overly eager compilers that will bleat about e.g.

1804

* (U16)n > (size_t)~0/sizeof(U16) always being false. */

1805

#ifdef PERL_MALLOC_WRAP

1806

#define MEM_WRAP_CHECK(n,t) \

1807

(void)(UNLIKELY(sizeof(t) > 1 && ((MEM_SIZE)(n)+0.0) > MEM_SIZE_MAX/sizeof(t)) && (croak_memory_wrap(),0))

1808

#define MEM_WRAP_CHECK_1(n,t,a) \

1809

(void)(UNLIKELY(sizeof(t) > 1 && ((MEM_SIZE)(n)+0.0) > MEM_SIZE_MAX/sizeof(t)) && (Perl_croak_nocontext("%s",(a)),0))

1810

#define MEM_WRAP_CHECK_(n,t) MEM_WRAP_CHECK(n,t),

1811

1812

#define PERL_STRLEN_ROUNDUP(n) ((void)(((n) > MEM_SIZE_MAX - 2 * PERL_STRLEN_ROUNDUP_QUANTUM) ? (croak_memory_wrap(),0):0),((n-1+PERL_STRLEN_ROUNDUP_QUANTUM)&~((MEM_SIZE)PERL_STRLEN_ROUNDUP_QUANTUM-1)))

1813

#else

1814

1815

#define MEM_WRAP_CHECK(n,t)

1816

#define MEM_WRAP_CHECK_1(n,t,a)

1817

#define MEM_WRAP_CHECK_2(n,t,a,b)

1818

#define MEM_WRAP_CHECK_(n,t)

1819

1820

#define PERL_STRLEN_ROUNDUP(n) (((n-1+PERL_STRLEN_ROUNDUP_QUANTUM)&~((MEM_SIZE)PERL_STRLEN_ROUNDUP_QUANTUM-1)))

#endif

#ifdef PERL_MEM_LOG

/*

* If PERL_MEM_LOG is defined, all Newx()s, Renew()s, and Safefree()s

1827

* go through functions, which are handy for debugging breakpoints, but

1828

* which more importantly get the immediate calling environment (file and

1829

* line number, and C function name if available) passed in. This info can

1830

* then be used for logging the calls, for which one gets a sample

1831

* implementation unless -DPERL_MEM_LOG_NOIMPL is also defined.

1832

*

1833

* Known problems:

1834

* - not all memory allocs get logged, only those

1835

* that go through Newx() and derivatives (while all

1836

* Safefrees do get logged)

1837

* - __FILE__ and __LINE__ do not work everywhere

1838

* - __func__ or __FUNCTION__ even less so

1839

* - I think more goes on after the perlio frees but

1840

* the thing is that STDERR gets closed (as do all

1841

* the file descriptors)

1842

* - no deeper calling stack than the caller of the Newx()

1843

* or the kind, but do I look like a C reflection/introspection

1844

* utility to you?

1845

* - the function prototypes for the logging functions

1846

* probably should maybe be somewhere else than handy.h

1847

* - one could consider inlining (macrofying) the logging

1848

* for speed, but I am too lazy

1849

* - one could imagine recording the allocations in a hash,

1850

* (keyed by the allocation address?), and maintain that

1851

* through reallocs and frees, but how to do that without

1852

* any News() happening...?

1853

* - lots of -Ddefines to get useful/controllable output

1854

* - lots of ENV reads

1855

*/

1856

1857

PERL_EXPORT_C Malloc_t Perl_mem_log_alloc(const UV n, const UV typesize, const char *type_name, Malloc_t newalloc, const char *filename, const int linenumber, const char *funcname);

1858

1859

PERL_EXPORT_C Malloc_t Perl_mem_log_realloc(const UV n, const UV typesize, const char *type_name, Malloc_t oldalloc, Malloc_t newalloc, const char *filename, const int linenumber, const char *funcname);

1860

1861

PERL_EXPORT_C Malloc_t Perl_mem_log_free(Malloc_t oldalloc, const char *filename, const int linenumber, const char *funcname);

1862

1863

# ifdef PERL_CORE

1864

# ifndef PERL_MEM_LOG_NOIMPL

enum mem_log_type {

MLT_ALLOC,

MLT_REALLOC,

MLT_FREE,

MLT_NEW_SV,

MLT_DEL_SV

};

# endif

# if defined(PERL_IN_SV_C) /* those are only used in sv.c */

1874

void Perl_mem_log_new_sv(const SV *sv, const char *filename, const int linenumber, const char *funcname);

1875

void Perl_mem_log_del_sv(const SV *sv, const char *filename, const int linenumber, const char *funcname);

# endif

# endif

#endif

#ifdef PERL_MEM_LOG

#define MEM_LOG_ALLOC(n,t,a) Perl_mem_log_alloc(n,sizeof(t),STRINGIFY(t),a,__FILE__,__LINE__,FUNCTION__)

1883

#define MEM_LOG_REALLOC(n,t,v,a) Perl_mem_log_realloc(n,sizeof(t),STRINGIFY(t),v,a,__FILE__,__LINE__,FUNCTION__)

1884

#define MEM_LOG_FREE(a) Perl_mem_log_free(a,__FILE__,__LINE__,FUNCTION__)

1885

#endif

1886

1887

#ifndef MEM_LOG_ALLOC

1888

#define MEM_LOG_ALLOC(n,t,a) (a)

1889

#endif

1890

#ifndef MEM_LOG_REALLOC

1891

#define MEM_LOG_REALLOC(n,t,v,a) (a)

1892

#endif

1893

#ifndef MEM_LOG_FREE

1894

#define MEM_LOG_FREE(a) (a)

1895

#endif

1896

1897

#define Newx(v,n,t) (v = (MEM_WRAP_CHECK_(n,t) (t*)MEM_LOG_ALLOC(n,t,safemalloc((MEM_SIZE)((n)*sizeof(t))))))

1898

#define Newxc(v,n,t,c) (v = (MEM_WRAP_CHECK_(n,t) (c*)MEM_LOG_ALLOC(n,t,safemalloc((MEM_SIZE)((n)*sizeof(t))))))

1899

#define Newxz(v,n,t) (v = (MEM_WRAP_CHECK_(n,t) (t*)MEM_LOG_ALLOC(n,t,safecalloc((n),sizeof(t)))))

1900

1901

#ifndef PERL_CORE

1902

/* pre 5.9.x compatibility */

1903

#define New(x,v,n,t) Newx(v,n,t)

1904

#define Newc(x,v,n,t,c) Newxc(v,n,t,c)

1905

#define Newz(x,v,n,t) Newxz(v,n,t)

1906

#endif

1907

1908

#define Renew(v,n,t) \

1909

(v = (MEM_WRAP_CHECK_(n,t) (t*)MEM_LOG_REALLOC(n,t,v,saferealloc((Malloc_t)(v),(MEM_SIZE)((n)*sizeof(t))))))

1910

#define Renewc(v,n,t,c) \

1911

(v = (MEM_WRAP_CHECK_(n,t) (c*)MEM_LOG_REALLOC(n,t,v,saferealloc((Malloc_t)(v),(MEM_SIZE)((n)*sizeof(t))))))

1912

1913

#ifdef PERL_POISON

1914

#define Safefree(d) \

1915

((d) ? (void)(safefree(MEM_LOG_FREE((Malloc_t)(d))), Poison(&(d), 1, Malloc_t)) : (void) 0)

1916

#else

1917

#define Safefree(d) safefree(MEM_LOG_FREE((Malloc_t)(d)))

1918

#endif

1919

1920

#define Move(s,d,n,t) (MEM_WRAP_CHECK_(n,t) (void)memmove((char*)(d),(const char*)(s), (n) * sizeof(t)))

1921

#define Copy(s,d,n,t) (MEM_WRAP_CHECK_(n,t) (void)memcpy((char*)(d),(const char*)(s), (n) * sizeof(t)))

1922

#define Zero(d,n,t) (MEM_WRAP_CHECK_(n,t) (void)memzero((char*)(d), (n) * sizeof(t)))

1923

1924

#define MoveD(s,d,n,t) (MEM_WRAP_CHECK_(n,t) memmove((char*)(d),(const char*)(s), (n) * sizeof(t)))

1925

#define CopyD(s,d,n,t) (MEM_WRAP_CHECK_(n,t) memcpy((char*)(d),(const char*)(s), (n) * sizeof(t)))

1926

#ifdef HAS_MEMSET

1927

#define ZeroD(d,n,t) (MEM_WRAP_CHECK_(n,t) memzero((char*)(d), (n) * sizeof(t)))

1928

#else

1929

/* Using bzero(), which returns void. */

1930

#define ZeroD(d,n,t) (MEM_WRAP_CHECK_(n,t) memzero((char*)(d), (n) * sizeof(t)),d)

1931

#endif

1932

1933

#define PoisonWith(d,n,t,b) (MEM_WRAP_CHECK_(n,t) (void)memset((char*)(d), (U8)(b), (n) * sizeof(t)))

1934

#define PoisonNew(d,n,t) PoisonWith(d,n,t,0xAB)

1935

#define PoisonFree(d,n,t) PoisonWith(d,n,t,0xEF)

1936

#define Poison(d,n,t) PoisonFree(d,n,t)

1937

1938

#ifdef PERL_POISON

1939

# define PERL_POISON_EXPR(x) x

1940

#else

1941

# define PERL_POISON_EXPR(x)

1942

#endif

1943

1944

#ifdef USE_STRUCT_COPY

1945

#define StructCopy(s,d,t) (*((t*)(d)) = *((t*)(s)))

1946

#else

1947

#define StructCopy(s,d,t) Copy(s,d,1,t)

1948

#endif

1949

1950

/* C_ARRAY_LENGTH is the number of elements in the C array (so you

1951

* want your zero-based indices to be less than but not equal to).

1952

*

1953

* C_ARRAY_END is one past the last: half-open/half-closed range,

1954

* not last-inclusive range. */

1955

#define C_ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))

1956

#define C_ARRAY_END(a) ((a) + C_ARRAY_LENGTH(a))

#ifdef NEED_VA_COPY

# ifdef va_copy

# define Perl_va_copy(s, d) va_copy(d, s)

1961

# else

1962

# if defined(__va_copy)

1963

# define Perl_va_copy(s, d) __va_copy(d, s)

1964

# else

1965

# define Perl_va_copy(s, d) Copy(s, d, 1, va_list)

# endif

# endif

#endif

/* convenience debug macros */

1971

#ifdef USE_ITHREADS

1972

#define pTHX_FORMAT "Perl interpreter: 0x%p"

1973

#define pTHX__FORMAT ", Perl interpreter: 0x%p"

1974

#define pTHX_VALUE_ (void *)my_perl,

1975

#define pTHX_VALUE (void *)my_perl

1976

#define pTHX__VALUE_ ,(void *)my_perl,

1977

#define pTHX__VALUE ,(void *)my_perl

#else

#define pTHX_FORMAT

#define pTHX__FORMAT

#define pTHX_VALUE_

#define pTHX_VALUE

#define pTHX__VALUE_

#define pTHX__VALUE

#endif /* USE_ITHREADS */

1986

1987

/* Perl_deprecate was not part of the public API, and did not have a deprecate()

1988

shortcut macro defined without -DPERL_CORE. Neither codesearch.google.com nor

1989

CPAN::Unpack show any users outside the core. */

1990

#ifdef PERL_CORE

1991

# define deprecate(s) Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), "Use of " s " is deprecated")

1992

#endif

1993

1994

/* Internal macros to deal with gids and uids */

1995

#ifdef PERL_CORE

1996

1997

# if Uid_t_size > IVSIZE

1998

# define sv_setuid(sv, uid) sv_setnv((sv), (NV)(uid))

1999

# define SvUID(sv) SvNV(sv)

2000

# else

2001

# if Uid_t_sign <= 0

2002

# define sv_setuid(sv, uid) sv_setiv((sv), (IV)(uid))

2003

# define SvUID(sv) SvIV(sv)

2004

# else

2005

# define sv_setuid(sv, uid) sv_setuv((sv), (UV)(uid))

2006

# define SvUID(sv) SvUV(sv)

2007

# endif

2008

# endif /* Uid_t_size */

2009

2010

# if Gid_t_size > IVSIZE

2011

# define sv_setgid(sv, gid) sv_setnv((sv), (NV)(gid))

2012

# define SvGID(sv) SvNV(sv)

2013

# else

2014

# if Gid_t_sign <= 0

2015

# define sv_setgid(sv, gid) sv_setiv((sv), (IV)(gid))

2016

# define SvGID(sv) SvIV(sv)

2017

# else

2018

# define sv_setgid(sv, gid) sv_setuv((sv), (UV)(gid))

2019

# define SvGID(sv) SvUV(sv)

2020

# endif

2021

# endif /* Gid_t_size */

#endif

#endif /* HANDY_H */

/*

* Local variables:

* c-indentation-style: bsd

2030

* c-basic-offset: 4

2031

* indent-tabs-mode: nil

2032

* End:

2033

*

2034

* ex: set ts=8 sts=4 sw=4 et:

2035

*/