typedef I32TYPE I32;
typedef U32TYPE U32;
-#ifdef HAS_QUAD
+#ifdef QUADKIND
typedef I64TYPE I64;
typedef U64TYPE U64;
#endif
-/* INT64_C/UINT64_C are C99 from <stdint.h> (so they will not be
- * available in strict C89 mode), but they are nice, so let's define
- * them if necessary. */
-
-/* N.B. We would like to say HAS_QUAD here, but that doesn't actually mean what
- * it has always been documented to mean (see RT #119753) and is explicitly turned
- * off in perl.h outside of core with dire warnings about removing the undef.
- */
-
-#if defined(QUADKIND)
-# undef PeRl_INT64_C
-# undef PeRl_UINT64_C
-/* Prefer the native integer types (int and long) over long long
- * (which is not C89) and Win32-specific __int64. */
-# if QUADKIND == QUAD_IS_INT && INTSIZE == 8
-# define PeRl_INT64_C(c) (c)
-# define PeRl_UINT64_C(c) CAT2(c,U)
-# endif
-# if QUADKIND == QUAD_IS_LONG && LONGSIZE == 8
-# define PeRl_INT64_C(c) CAT2(c,L)
-# define PeRl_UINT64_C(c) CAT2(c,UL)
-# endif
-# if QUADKIND == QUAD_IS_LONG_LONG && defined(HAS_LONG_LONG)
-# define PeRl_INT64_C(c) CAT2(c,LL)
-# define PeRl_UINT64_C(c) CAT2(c,ULL)
-# endif
-# if QUADKIND == QUAD_IS___INT64
-# define PeRl_INT64_C(c) CAT2(c,I64)
-# define PeRl_UINT64_C(c) CAT2(c,UI64)
-# endif
-# ifndef PeRl_INT64_C
-# define PeRl_INT64_C(c) ((I64)(c)) /* last resort */
-# define PeRl_UINT64_C(c) ((U64)(c))
-# endif
-/* In OS X the INT64_C/UINT64_C are defined with LL/ULL, which will
- * not fly with C89-pedantic gcc, so let's undefine them first so that
- * we can redefine them with our native integer preferring versions. */
-# if defined(PERL_DARWIN) && defined(PERL_GCC_PEDANTIC)
-# undef INT64_C
-# undef UINT64_C
-# endif
-# ifndef INT64_C
-# define INT64_C(c) PeRl_INT64_C(c)
-# endif
-# ifndef UINT64_C
-# define UINT64_C(c) PeRl_UINT64_C(c)
-# endif
-#endif
-
#if defined(UINT8_MAX) && defined(INT16_MAX) && defined(INT32_MAX)
/* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.
#endif
+/* These C99 typedefs are useful sometimes for, say, loop variables whose
+ * maximum values are small, but for which speed trumps size. If we have a C99
+ * compiler, use that. Otherwise, a plain 'int' should be good enough.
+ *
+ * Restrict these to core for now until we are more certain this is a good
+ * idea. */
+#if defined(PERL_CORE) || defined(PERL_EXT)
+# ifdef I_STDINT
+ typedef int_fast8_t PERL_INT_FAST8_T;
+ typedef uint_fast8_t PERL_UINT_FAST8_T;
+ typedef int_fast16_t PERL_INT_FAST16_T;
+ typedef uint_fast16_t PERL_UINT_FAST16_T;
+# else
+ typedef int PERL_INT_FAST8_T;
+ typedef unsigned int PERL_UINT_FAST8_T;
+ typedef int PERL_INT_FAST16_T;
+ typedef unsigned int PERL_UINT_FAST16_T;
+# endif
+#endif
+
/* log(2) (i.e., log base 10 of 2) is pretty close to 0.30103, just in case
* anyone is grepping for it */
#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log10(2) =~ 146/485 */
# endif
#endif
+/* Returns a boolean as to whether the input unsigned number is a power of 2
+ * (2**0, 2**1, etc). In other words if it has just a single bit set.
+ * If not, subtracting 1 would leave the uppermost bit set, so the & would
+ * yield non-zero */
+#if defined(PERL_CORE) || defined(PERL_EXT)
+# define isPOWER_OF_2(n) (n && (n & (n-1)) == 0)
+#endif
+
/* This is a helper macro to avoid preprocessor issues, replaced by nothing
* unless under DEBUGGING, where it expands to an assert of its argument,
* followed by a comma (hence the comma operator). If we just used a straight
#endif
/*
-=head1 SV-Body Allocation
+=head1 SV Manipulation Functions
=for apidoc Ama|SV*|newSVpvs|"literal string" s
Like C<newSVpvn>, but takes a literal string instead of a
#define strnNE(s1,s2,l) (strncmp(s1,s2,l) != 0)
#define strnEQ(s1,s2,l) (strncmp(s1,s2,l) == 0)
-#define memNE(s1,s2,l) (memcmp(s1,s2,l) != 0)
-#define memEQ(s1,s2,l) (memcmp(s1,s2,l) == 0)
+#define memEQ(s1,s2,l) (memcmp(((const void *) (s1)), ((const void *) (s2)), l) == 0)
+#define memNE(s1,s2,l) (! memEQ(s1,s2,l))
/* memEQ and memNE where second comparand is a string constant */
#define memEQs(s1, l, s2) \
#define strBEGINs(s1,s2) (strncmp(s1,"" s2 "", sizeof(s2)-1) == 0)
#define memBEGINs(s1, l, s2) \
- ( (l) >= sizeof(s2) - 1 \
+ ( (Ptrdiff_t) (l) >= (Ptrdiff_t) sizeof(s2) - 1 \
&& memEQ(s1, "" s2 "", sizeof(s2)-1))
#define memBEGINPs(s1, l, s2) \
- ( (l) > sizeof(s2) - 1 \
+ ( (Ptrdiff_t) (l) > (Ptrdiff_t) sizeof(s2) - 1 \
&& memEQ(s1, "" s2 "", sizeof(s2)-1))
#define memENDs(s1, l, s2) \
- ( (l) >= sizeof(s2) - 1 \
+ ( (Ptrdiff_t) (l) >= (Ptrdiff_t) sizeof(s2) - 1 \
&& memEQ(s1 + (l) - (sizeof(s2) - 1), "" s2 "", sizeof(s2)-1))
#define memENDPs(s1, l, s2) \
- ( (l) > sizeof(s2) \
+ ( (Ptrdiff_t) (l) > (Ptrdiff_t) sizeof(s2) \
&& memEQ(s1 + (l) - (sizeof(s2) - 1), "" s2 "", sizeof(s2)-1))
#endif /* End of making macros private */
*/
-/* Specify the widest unsigned type on the platform. Use U64TYPE because U64
- * is known only in the perl core, and this macro can be called from outside
- * that */
-#ifdef HAS_QUAD
-# define WIDEST_UTYPE U64TYPE
+/* Specify the widest unsigned type on the platform. */
+#ifdef QUADKIND
+# define WIDEST_UTYPE U64
#else
# define WIDEST_UTYPE U32
#endif
#define FITS_IN_8_BITS(c) (1)
#endif
+/* Returns true if c is in the range l..u
+ * Written with the cast so it only needs one conditional test
+ */
+#define inRANGE(c, l, u) (__ASSERT_((u) >= (l)) \
+ ((WIDEST_UTYPE) (((c) - (l)) | 0) <= ((WIDEST_UTYPE) ((u) - (l)))))
+
#ifdef EBCDIC
# ifndef _ALL_SOURCE
/* The native libc isascii() et.al. functions return the wrong results
*
* The first group of these is ordered in what I (khw) estimate to be the
* frequency of their use. This gives a slight edge to exiting a loop earlier
- * (in reginclass() in regexec.c) */
+ * (in reginclass() in regexec.c). Except \v should be last, as it isn't a
+ * real Posix character class, and some (small) inefficiencies in regular
+ * expression handling would be introduced by putting it in the middle of those
+ * that are. Also, cntrl and ascii come after the others as it may be useful
+ * to group these which have no members that match above Latin1, (or above
+ * ASCII in the latter case) */
+
# define _CC_WORDCHAR 0 /* \w and [:word:] */
# define _CC_DIGIT 1 /* \d and [:digit:] */
# define _CC_ALPHA 2 /* [:alpha:] */
# define _CC_ALPHANUMERIC 7 /* [:alnum:] */
# define _CC_GRAPH 8 /* [:graph:] */
# define _CC_CASED 9 /* [:lower:] or [:upper:] under /i */
-
-#define _FIRST_NON_SWASH_CC 10
-/* The character classes above are implemented with swashes. The second group
- * (just below) contains the ones implemented without. These are also sorted
- * in rough order of the frequency of their use, except that \v should be last,
- * as it isn't a real Posix character class, and some (small) inefficiencies in
- * regular expression handling would be introduced by putting it in the middle
- * of those that are. Also, cntrl and ascii come after the others as it may be
- * useful to group these which have no members that match above Latin1, (or
- * above ASCII in the latter case) */
-
# define _CC_SPACE 10 /* \s, [:space:] */
# define _CC_PSXSPC _CC_SPACE /* XXX Temporary, can be removed
when the deprecated isFOO_utf8()
} _char_class_number;
#endif
-#define POSIX_SWASH_COUNT _FIRST_NON_SWASH_CC
#define POSIX_CC_COUNT (_HIGHEST_REGCOMP_DOT_H_SYNC + 1)
-#if defined(PERL_IN_UTF8_C) \
- || defined(PERL_IN_REGCOMP_C) \
- || defined(PERL_IN_REGEXEC_C)
-# if _CC_WORDCHAR != 0 || _CC_DIGIT != 1 || _CC_ALPHA != 2 || _CC_LOWER != 3 \
- || _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6 \
- || _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8 || _CC_CASED != 9
- #error Need to adjust order of swash_property_names[]
-# endif
-
-/* This is declared static in each of the few files that this is #defined for
- * to keep them from being publicly accessible. Hence there is a small amount
- * of wasted space */
-
-static const char* const swash_property_names[] = {
- "XPosixWord",
- "XPosixDigit",
- "XPosixAlpha",
- "XPosixLower",
- "XPosixUpper",
- "XPosixPunct",
- "XPosixPrint",
- "XPosixAlnum",
- "XPosixGraph",
- "Cased"
-};
-#endif
-
START_EXTERN_C
# ifdef DOINIT
EXTCONST U32 PL_charclass[] = {
&& ((PL_charclass[(U8) (c)] & _CC_mask_A(classnum)) \
== _CC_mask_A(classnum)))
-# define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA)
+/* On ASCII platforms certain classes form a single range. It's faster to
+ * special case these. isDIGIT is a single range on all platforms */
+# ifdef EBCDIC
+# define isALPHA_A(c) _generic_isCC_A(c, _CC_ALPHA)
+# define isGRAPH_A(c) _generic_isCC_A(c, _CC_GRAPH)
+# define isLOWER_A(c) _generic_isCC_A(c, _CC_LOWER)
+# define isPRINT_A(c) _generic_isCC_A(c, _CC_PRINT)
+# define isUPPER_A(c) _generic_isCC_A(c, _CC_UPPER)
+# else
+ /* By folding the upper and lowercase, we can use a single range */
+# define isALPHA_A(c) inRANGE((~('A' ^ 'a') & (c)), 'A', 'Z')
+# define isGRAPH_A(c) inRANGE(c, ' ' + 1, 0x7e)
+# define isLOWER_A(c) inRANGE(c, 'a', 'z')
+# define isPRINT_A(c) inRANGE(c, ' ', 0x7e)
+# define isUPPER_A(c) inRANGE(c, 'A', 'Z')
+# endif
# define isALPHANUMERIC_A(c) _generic_isCC_A(c, _CC_ALPHANUMERIC)
# define isBLANK_A(c) _generic_isCC_A(c, _CC_BLANK)
# define isCNTRL_A(c) _generic_isCC_A(c, _CC_CNTRL)
-# define isDIGIT_A(c) _generic_isCC(c, _CC_DIGIT) /* No non-ASCII digits */
-# define isGRAPH_A(c) _generic_isCC_A(c, _CC_GRAPH)
-# define isLOWER_A(c) _generic_isCC_A(c, _CC_LOWER)
-# define isPRINT_A(c) _generic_isCC_A(c, _CC_PRINT)
+# define isDIGIT_A(c) inRANGE(c, '0', '9')
# define isPUNCT_A(c) _generic_isCC_A(c, _CC_PUNCT)
# define isSPACE_A(c) _generic_isCC_A(c, _CC_SPACE)
-# define isUPPER_A(c) _generic_isCC_A(c, _CC_UPPER)
# define isWORDCHAR_A(c) _generic_isCC_A(c, _CC_WORDCHAR)
# define isXDIGIT_A(c) _generic_isCC(c, _CC_XDIGIT) /* No non-ASCII xdigits
*/
* hard-code various macro definitions that wouldn't otherwise be available
* to it. Most are coded based on first principles. These are written to
* avoid EBCDIC vs. ASCII #ifdef's as much as possible. */
-# define isDIGIT_A(c) ((c) <= '9' && (c) >= '0')
+# define isDIGIT_A(c) inRANGE(c, '0', '9')
# define isBLANK_A(c) ((c) == ' ' || (c) == '\t')
# define isSPACE_A(c) (isBLANK_A(c) \
|| (c) == '\n' \
* uppercase. The tests for those aren't necessary on ASCII, but hurt only
* performance (if optimization isn't on), and allow the same code to be
* used for both platform types */
-# define isLOWER_A(c) ((c) >= 'a' && (c) <= 'z' \
- && ( (c) <= 'i' \
- || ((c) >= 'j' && (c) <= 'r') \
- || (c) >= 's'))
-# define isUPPER_A(c) ((c) >= 'A' && (c) <= 'Z' \
- && ( (c) <= 'I' \
- || ((c) >= 'J' && (c) <= 'R') \
- || (c) >= 'S'))
+# define isLOWER_A(c) inRANGE((c), 'a', 'i') \
+ || inRANGE((c), 'j', 'r') \
+ || inRANGE((c), 's', 'z')
+# define isUPPER_A(c) inRANGE((c), 'A', 'I') \
+ || inRANGE((c), 'J', 'R') \
+ || inRANGE((c), 'S', 'Z')
# define isALPHA_A(c) (isUPPER_A(c) || isLOWER_A(c))
# define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c))
# define isWORDCHAR_A(c) (isALPHANUMERIC_A(c) || (c) == '_')
# define isIDFIRST_A(c) (isALPHA_A(c) || (c) == '_')
-# define isXDIGIT_A(c) (isDIGIT_A(c) \
- || ((c) >= 'a' && (c) <= 'f') \
- || ((c) <= 'F' && (c) >= 'A'))
+# define isXDIGIT_A(c) ( isDIGIT_A(c) \
+ || inRANGE((c), 'a', 'f') \
+ || inRANGE((c), 'A', 'F')
# define isPUNCT_A(c) ((c) == '-' || (c) == '!' || (c) == '"' \
|| (c) == '#' || (c) == '$' || (c) == '%' \
|| (c) == '&' || (c) == '\'' || (c) == '(' \
# define isCNTRL_A(c) ((c) == '\0' || (c) == '\a' || (c) == '\b' \
|| (c) == '\f' || (c) == '\n' || (c) == '\r' \
|| (c) == '\t' || (c) == '\v' \
- || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */ \
+ || inRANGE((c), 1, 3) /* SOH, STX, ETX */ \
|| (c) == 7 /* U+7F DEL */ \
- || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */ \
- /* DLE, DC[1-3] */ \
+ || inRANGE((c), 0x0E, 0x13) /* SO SI DLE \
+ DC[1-3] */ \
|| (c) == 0x18 /* U+18 CAN */ \
|| (c) == 0x19 /* U+19 EOM */ \
- || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */ \
+ || inRANGE((c), 0x1C, 0x1F) /* [FGRU]S */ \
|| (c) == 0x26 /* U+17 ETB */ \
|| (c) == 0x27 /* U+1B ESC */ \
|| (c) == 0x2D /* U+05 ENQ */ \
|| NATIVE_TO_LATIN1((U8) c) == 0xA0)))
# define isUPPER_L1(c) (isUPPER_A(c) \
|| (FITS_IN_8_BITS(c) \
- && ( NATIVE_TO_LATIN1((U8) c) >= 0xC0 \
- && NATIVE_TO_LATIN1((U8) c) <= 0xDE \
+ && ( IN_RANGE(NATIVE_TO_LATIN1((U8) c), \
+ 0xC0, 0xDE) \
&& NATIVE_TO_LATIN1((U8) c) != 0xD7)))
# define isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT_A(c))
# define isIDFIRST_L1(c) (isALPHA_L1(c) || NATIVE_TO_LATIN1(c) == '_')
#define toLOWER(c) (isASCII(c) ? toLOWER_LATIN1(c) : (c))
#define toUPPER(c) (isASCII(c) ? toUPPER_LATIN1_MOD(c) : (c))
which uses table lookup and mask instead of subtraction. (This would
- work because the _MOD does not apply in the ASCII range) */
+ work because the _MOD does not apply in the ASCII range).
+
+ These actually are UTF-8 invariant casing, not just ASCII, as any non-ASCII
+ UTF-8 invariants are neither upper nor lower. (Only on EBCDIC platforms are
+ there non-ASCII invariants, and all of them are controls.) */
#define toLOWER(c) (isUPPER(c) ? (U8)((c) + ('a' - 'A')) : (c))
#define toUPPER(c) (isLOWER(c) ? (U8)((c) - ('a' - 'A')) : (c))
/* In the ASCII range, these are equivalent to what they're here defined to be.
* But by creating these definitions, other code doesn't have to be aware of
- * this detail */
+ * this detail. Actually this works for all UTF-8 invariants, not just the
+ * ASCII range. (EBCDIC platforms can have non-ASCII invariants.) */
#define toFOLD(c) toLOWER(c)
#define toTITLE(c) toUPPER(c)
|| (char)(c) == '_'))
/* These next three are also for internal core Perl use only: case-change
- * helper macros */
+ * helper macros. The reason for using the PL_latin arrays is in case the
+ * system function is defective; it ensures uniform results that conform to the
+ * Unicod standard. It does not handle the anomalies in UTF-8 Turkic locales */
#define _generic_toLOWER_LC(c, function, cast) (! FITS_IN_8_BITS(c) \
? (c) \
: (IN_UTF8_CTYPE_LOCALE) \
? PL_latin1_lc[ (U8) (c) ] \
- : (cast)function((cast)(c)))
+ : (cast)function((cast)(c)))
/* Note that the result can be larger than a byte in a UTF-8 locale. It
* returns a single value, so can't adequately return the upper case of LATIN
* SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two
* values "SS"); instead it asserts against that under DEBUGGING, and
- * otherwise returns its input */
+ * otherwise returns its input. It does not handle the anomalies in UTF-8
+ * Turkic locales. */
#define _generic_toUPPER_LC(c, function, cast) \
(! FITS_IN_8_BITS(c) \
? (c) \
* returns a single value, so can't adequately return the fold case of LATIN
* SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two
* values "ss"); instead it asserts against that under DEBUGGING, and
- * otherwise returns its input */
+ * otherwise returns its input. It does not handle the anomalies in UTF-8
+ * Turkic locales */
#define _generic_toFOLD_LC(c, function, cast) \
((UNLIKELY((c) == MICRO_SIGN) && IN_UTF8_CTYPE_LOCALE) \
? GREEK_SMALL_LETTER_MU \
? 0 /* Note that doesn't check validity for latin1 */ \
: above_latin1)
-/* NOTE that some of these macros have very similar ones in regcharclass.h.
- * For example, there is (at the time of this writing) an 'is_SPACE_utf8()'
- * there, differing in name only by an underscore from the one here
- * 'isSPACE_utf8(). The difference is that the ones here are probably more
- * efficient and smaller, using an O(1) array lookup for Latin1-range code
- * points; the regcharclass.h ones are implemented as a series of
- * "if-else-if-else ..." */
#define isALPHA_utf8(p) _generic_utf8(ALPHA, p)
#define isALPHANUMERIC_utf8(p) _generic_utf8(ALPHANUMERIC, p)
(void)(UNLIKELY(_MEM_WRAP_WILL_WRAP(n,t)) \
&& (Perl_croak_nocontext("%s",(a)),0))
+/* "a" arg must be a string literal */
+# define MEM_WRAP_CHECK_s(n,t,a) \
+ (void)(UNLIKELY(_MEM_WRAP_WILL_WRAP(n,t)) \
+ && (Perl_croak_nocontext("" a ""),0))
+
#define MEM_WRAP_CHECK_(n,t) MEM_WRAP_CHECK(n,t),
#define PERL_STRLEN_ROUNDUP(n) ((void)(((n) > MEM_SIZE_MAX - 2 * PERL_STRLEN_ROUNDUP_QUANTUM) ? (croak_memory_wrap(),0) : 0), _PERL_STRLEN_ROUNDUP_UNCHECKED(n))
#define MEM_WRAP_CHECK(n,t)
#define MEM_WRAP_CHECK_1(n,t,a)
-#define MEM_WRAP_CHECK_2(n,t,a,b)
+#define MEM_WRAP_CHECK_s(n,t,a)
#define MEM_WRAP_CHECK_(n,t)
#define PERL_STRLEN_ROUNDUP(n) _PERL_STRLEN_ROUNDUP_UNCHECKED(n)
#define Copy(s,d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), perl_assert_ptr(s), (void)memcpy((char*)(d),(const char*)(s), (n) * sizeof(t)))
#define Zero(d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), (void)memzero((char*)(d), (n) * sizeof(t)))
+/* Like above, but returns a pointer to 'd' */
#define MoveD(s,d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), perl_assert_ptr(s), memmove((char*)(d),(const char*)(s), (n) * sizeof(t)))
#define CopyD(s,d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), perl_assert_ptr(s), memcpy((char*)(d),(const char*)(s), (n) * sizeof(t)))
#define ZeroD(d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), memzero((char*)(d), (n) * sizeof(t)))