handy.h: Comments, remove extraneous parens

[perl5.git] / handy.h
diff --git a/handy.h b/handy.h

index 2999b3c..e423ee1 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -165,54 +165,11 @@ typedef U16TYPE U16;
  typedef I32TYPE I32;
  typedef U32TYPE U32;
  
-#ifdef HAS_QUAD
+#ifdef QUADKIND
  typedef I64TYPE I64;
  typedef U64TYPE U64;
  #endif
  
-/* INT64_C/UINT64_C are C99 from <stdint.h> (so they will not be
- * available in strict C89 mode), but they are nice, so let's define
- * them if necessary. */
-#if defined(HAS_QUAD)
-#  undef PeRl_INT64_C
-#  undef PeRl_UINT64_C
-/* Prefer the native integer types (int and long) over long long
- * (which is not C89) and Win32-specific __int64. */
-#  if QUADKIND == QUAD_IS_INT && INTSIZE == 8
-#    define PeRl_INT64_C(c)    (c)
-#    define PeRl_UINT64_C(c)   CAT2(c,U)
-#  endif
-#  if QUADKIND == QUAD_IS_LONG && LONGSIZE == 8
-#    define PeRl_INT64_C(c)    CAT2(c,L)
-#    define PeRl_UINT64_C(c)   CAT2(c,UL)
-#  endif
-#  if QUADKIND == QUAD_IS_LONG_LONG && defined(HAS_LONG_LONG)
-#    define PeRl_INT64_C(c)    CAT2(c,LL)
-#    define PeRl_UINT64_C(c)   CAT2(c,ULL)
-#  endif
-#  if QUADKIND == QUAD_IS___INT64
-#    define PeRl_INT64_C(c)    CAT2(c,I64)
-#    define PeRl_UINT64_C(c)   CAT2(c,UI64)
-#  endif
-#  ifndef PeRl_INT64_C
-#    define PeRl_INT64_C(c)    ((I64)(c)) /* last resort */
-#    define PeRl_UINT64_C(c)   ((U64)(c))
-#  endif
-/* In OS X the INT64_C/UINT64_C are defined with LL/ULL, which will
- * not fly with C89-pedantic gcc, so let's undefine them first so that
- * we can redefine them with our native integer preferring versions. */
-#  if defined(PERL_DARWIN) && defined(PERL_GCC_PEDANTIC)
-#    undef INT64_C
-#    undef UINT64_C
-#  endif
-#  ifndef INT64_C
-#    define INT64_C(c) PeRl_INT64_C(c)
-#  endif
-#  ifndef UINT64_C
-#    define UINT64_C(c) PeRl_UINT64_C(c)
-#  endif
-#endif
-
  #if defined(UINT8_MAX) && defined(INT16_MAX) && defined(INT32_MAX)
  
  /* I8_MAX and I8_MIN constants are not defined, as I8 is an ambiguous type.
@@ -260,8 +217,29 @@ typedef U64TYPE U64;
  
  #endif
  
-/* log(2) is pretty close to  0.30103, just in case anyone is grepping for it */
-#define BIT_DIGITS(N)   (((N)*146)/485 + 1)  /* log2(10) =~ 146/485 */
+/* These C99 typedefs are useful sometimes for, say, loop variables whose
+ * maximum values are small, but for which speed trumps size.  If we have a C99
+ * compiler, use that.  Otherwise, a plain 'int' should be good enough.
+ *
+ * Restrict these to core for now until we are more certain this is a good
+ * idea. */
+#if defined(PERL_CORE) || defined(PERL_EXT)
+#  ifdef I_STDINT
+    typedef  int_fast8_t  PERL_INT_FAST8_T;
+    typedef uint_fast8_t  PERL_UINT_FAST8_T;
+    typedef  int_fast16_t PERL_INT_FAST16_T;
+    typedef uint_fast16_t PERL_UINT_FAST16_T;
+#  else
+    typedef int           PERL_INT_FAST8_T;
+    typedef unsigned int  PERL_UINT_FAST8_T;
+    typedef int           PERL_INT_FAST16_T;
+    typedef unsigned int  PERL_UINT_FAST16_T;
+#  endif
+#endif
+
+/* log(2) (i.e., log base 10 of 2) is pretty close to 0.30103, just in case
+ * anyone is grepping for it */
+#define BIT_DIGITS(N)   (((N)*146)/485 + 1)  /* log10(2) =~ 146/485 */
  #define TYPE_DIGITS(T)  BIT_DIGITS(sizeof(T) * 8)
  #define TYPE_CHARS(T)   (TYPE_DIGITS(T) + 2) /* sign, NUL */
  
@@ -277,6 +255,14 @@ typedef U64TYPE U64;
  #  endif
  #endif
  
+/* Returns a boolean as to whether the input unsigned number is a power of 2
+ * (2**0, 2**1, etc).  In other words if it has just a single bit set.
+ * If not, subtracting 1 would leave the uppermost bit set, so the & would
+ * yield non-zero */
+#if defined(PERL_CORE) || defined(PERL_EXT)
+#  define isPOWER_OF_2(n) (n && (n & (n-1)) == 0)
+#endif
+
  /* This is a helper macro to avoid preprocessor issues, replaced by nothing
   * unless under DEBUGGING, where it expands to an assert of its argument,
   * followed by a comma (hence the comma operator).  If we just used a straight
@@ -293,80 +279,80 @@ typedef U64TYPE U64;
  #endif
  
  /*
-=head1 SV-Body Allocation
+=head1 SV Manipulation Functions
  
-=for apidoc Ama|SV*|newSVpvs|const char* s
-Like C<newSVpvn>, but takes a C<NUL>-terminated literal string instead of a
+=for apidoc Ama|SV*|newSVpvs|"literal string" s
+Like C<newSVpvn>, but takes a literal string instead of a
  string/length pair.
  
-=for apidoc Ama|SV*|newSVpvs_flags|const char* s|U32 flags
-Like C<newSVpvn_flags>, but takes a C<NUL>-terminated literal string instead of
+=for apidoc Ama|SV*|newSVpvs_flags|"literal string" s|U32 flags
+Like C<newSVpvn_flags>, but takes a literal string instead of
  a string/length pair.
  
-=for apidoc Ama|SV*|newSVpvs_share|const char* s
-Like C<newSVpvn_share>, but takes a C<NUL>-terminated literal string instead of
+=for apidoc Ama|SV*|newSVpvs_share|"literal string" s
+Like C<newSVpvn_share>, but takes a literal string instead of
  a string/length pair and omits the hash parameter.
  
-=for apidoc Am|void|sv_catpvs_flags|SV* sv|const char* s|I32 flags
-Like C<sv_catpvn_flags>, but takes a C<NUL>-terminated literal string instead
+=for apidoc Am|void|sv_catpvs_flags|SV* sv|"literal string" s|I32 flags
+Like C<sv_catpvn_flags>, but takes a literal string instead
  of a string/length pair.
  
-=for apidoc Am|void|sv_catpvs_nomg|SV* sv|const char* s
-Like C<sv_catpvn_nomg>, but takes a C<NUL>-terminated literal string instead of
+=for apidoc Am|void|sv_catpvs_nomg|SV* sv|"literal string" s
+Like C<sv_catpvn_nomg>, but takes a literal string instead of
  a string/length pair.
  
-=for apidoc Am|void|sv_catpvs|SV* sv|const char* s
-Like C<sv_catpvn>, but takes a C<NUL>-terminated literal string instead of a
+=for apidoc Am|void|sv_catpvs|SV* sv|"literal string" s
+Like C<sv_catpvn>, but takes a literal string instead of a
  string/length pair.
  
-=for apidoc Am|void|sv_catpvs_mg|SV* sv|const char* s
-Like C<sv_catpvn_mg>, but takes a C<NUL>-terminated literal string instead of a
+=for apidoc Am|void|sv_catpvs_mg|SV* sv|"literal string" s
+Like C<sv_catpvn_mg>, but takes a literal string instead of a
  string/length pair.
  
-=for apidoc Am|void|sv_setpvs|SV* sv|const char* s
-Like C<sv_setpvn>, but takes a C<NUL>-terminated literal string instead of a
+=for apidoc Am|void|sv_setpvs|SV* sv|"literal string" s
+Like C<sv_setpvn>, but takes a literal string instead of a
  string/length pair.
  
-=for apidoc Am|void|sv_setpvs_mg|SV* sv|const char* s
-Like C<sv_setpvn_mg>, but takes a C<NUL>-terminated literal string instead of a
+=for apidoc Am|void|sv_setpvs_mg|SV* sv|"literal string" s
+Like C<sv_setpvn_mg>, but takes a literal string instead of a
  string/length pair.
  
-=for apidoc Am|SV *|sv_setref_pvs|const char* s
-Like C<sv_setref_pvn>, but takes a C<NUL>-terminated literal string instead of
+=for apidoc Am|SV *|sv_setref_pvs|"literal string" s
+Like C<sv_setref_pvn>, but takes a literal string instead of
  a string/length pair.
  
  =head1 Memory Management
  
-=for apidoc Ama|char*|savepvs|const char* s
-Like C<savepvn>, but takes a C<NUL>-terminated literal string instead of a
+=for apidoc Ama|char*|savepvs|"literal string" s
+Like C<savepvn>, but takes a literal string instead of a
  string/length pair.
  
-=for apidoc Ama|char*|savesharedpvs|const char* s
+=for apidoc Ama|char*|savesharedpvs|"literal string" s
  A version of C<savepvs()> which allocates the duplicate string in memory
  which is shared between threads.
  
  =head1 GV Functions
  
-=for apidoc Am|HV*|gv_stashpvs|const char* name|I32 create
-Like C<gv_stashpvn>, but takes a C<NUL>-terminated literal string instead of a
+=for apidoc Am|HV*|gv_stashpvs|"literal string" name|I32 create
+Like C<gv_stashpvn>, but takes a literal string instead of a
  string/length pair.
  
  =head1 Hash Manipulation Functions
  
-=for apidoc Am|SV**|hv_fetchs|HV* tb|const char* key|I32 lval
-Like C<hv_fetch>, but takes a C<NUL>-terminated literal string instead of a
+=for apidoc Am|SV**|hv_fetchs|HV* tb|"literal string" key|I32 lval
+Like C<hv_fetch>, but takes a literal string instead of a
  string/length pair.
  
-=for apidoc Am|SV**|hv_stores|HV* tb|const char* key|NULLOK SV* val
-Like C<hv_store>, but takes a C<NUL>-terminated literal string instead of a
+=for apidoc Am|SV**|hv_stores|HV* tb|"literal string" key|SV* val
+Like C<hv_store>, but takes a literal string instead of a
  string/length pair
  and omits the hash parameter.
  
  =head1 Lexer interface
  
-=for apidoc Amx|void|lex_stuff_pvs|const char *pv|U32 flags
+=for apidoc Amx|void|lex_stuff_pvs|"literal string" pv|U32 flags
  
-Like L</lex_stuff_pvn>, but takes a C<NUL>-terminated literal string instead of
+Like L</lex_stuff_pvn>, but takes a literal string instead of
  a string/length pair.
  
  =cut
@@ -463,40 +449,90 @@ are not equal.  The C<len> parameter indicates the number of bytes to compare.
  Returns zero if non-equal, or non-zero if equal.
  
  =cut
+
+New macros should use the following conventions for their names (which are
+based on the underlying C library functions):
+
+  (mem | str n? ) (EQ | NE | LT | GT | GE | (( BEGIN | END ) P? )) l? s?
+
+  Each has two main parameters, string-like operands that are compared
+  against each other, as specified by the macro name.  Some macros may
+  additionally have one or potentially even two length parameters.  If a length
+  parameter applies to both string parameters, it will be positioned third;
+  otherwise any length parameter immediately follows the string parameter it
+  applies to.
+
+  If the prefix to the name is 'str', the string parameter is a pointer to a C
+  language string.  Such a string does not contain embedded NUL bytes; its
+  length may be unknown, but can be calculated by C<strlen()>, since it is
+  terminated by a NUL, which isn't included in its length.
+
+  The optional 'n' following 'str' means that that there is a third parameter,
+  giving the maximum number of bytes to look at in each string.  Even if both
+  strings are longer than the length parameter, those extra bytes will be
+  unexamined.
+
+  The 's' suffix means that the 2nd byte string parameter is a literal C
+  double-quoted string.  Its length will automatically be calculated by the
+  macro, so no length parameter will ever be needed for it.
+
+  If the prefix is 'mem', the string parameters don't have to be C strings;
+  they may contain embedded NUL bytes, do not necessarily have a terminating
+  NUL, and their lengths can be known only through other means, which in
+  practice are additional parameter(s) passed to the function.  All 'mem'
+  functions have at least one length parameter.  Barring any 'l' or 's' suffix,
+  there is a single length parameter, in position 3, which applies to both
+  string parameters.  The 's' suffix means, as described above, that the 2nd
+  string is a literal double-quoted C string (hence its length is calculated by
+  the macro, and the length parameter to the function applies just to the first
+  string parameter, and hence is positioned just after it).  An 'l' suffix
+  means that the 2nd string parameter has its own length parameter, and the
+  signature will look like memFOOl(s1, l1, s2, l2).
+
+  BEGIN (and END) are for testing if the 2nd string is an initial (or final)
+  substring  of the 1st string.  'P' if present indicates that the substring
+  must be a "proper" one in tha mathematical sense that the first one must be
+  strictly larger than the 2nd.
+
  */
  
  
-#define strNE(s1,s2) (strcmp(s1,s2))
-#define strEQ(s1,s2) (!strcmp(s1,s2))
+#define strNE(s1,s2) (strcmp(s1,s2) != 0)
+#define strEQ(s1,s2) (strcmp(s1,s2) == 0)
  #define strLT(s1,s2) (strcmp(s1,s2) < 0)
  #define strLE(s1,s2) (strcmp(s1,s2) <= 0)
  #define strGT(s1,s2) (strcmp(s1,s2) > 0)
  #define strGE(s1,s2) (strcmp(s1,s2) >= 0)
  
-#define strnNE(s1,s2,l) (strncmp(s1,s2,l))
-#define strnEQ(s1,s2,l) (!strncmp(s1,s2,l))
+#define strnNE(s1,s2,l) (strncmp(s1,s2,l) != 0)
+#define strnEQ(s1,s2,l) (strncmp(s1,s2,l) == 0)
  
-/* These names are controversial, so guarding against their being used in more
- * places than they already are.  strBEGs and StrStartsWith are potential
- * candidates */
-#if defined(PERL_IN_DOIO_C) || defined(PERL_IN_GV_C) || defined(PERL_IN_HV_C) || defined(PERL_IN_LOCALE_C) || defined(PERL_IN_PERL_C) || defined(PERL_IN_TOKE_C) || defined(PERL_EXT)
-#define strNEs(s1,s2) (strncmp(s1,"" s2 "", sizeof(s2)-1))
-#define strEQs(s1,s2) (!strncmp(s1,"" s2 "", sizeof(s2)-1))
-#endif
-
-#define memNE(s1,s2,l) (memcmp(s1,s2,l))
-#define memEQ(s1,s2,l) (!memcmp(s1,s2,l))
+#define memEQ(s1,s2,l) (memcmp(((const void *) (s1)), ((const void *) (s2)), l) == 0)
+#define memNE(s1,s2,l) (! memEQ(s1,s2,l))
  
  /* memEQ and memNE where second comparand is a string constant */
  #define memEQs(s1, l, s2) \
          (((sizeof(s2)-1) == (l)) && memEQ((s1), ("" s2 ""), (sizeof(s2)-1)))
  #define memNEs(s1, l, s2) (! memEQs(s1, l, s2))
  
-/* memEQ and memNE where second comparand is a string constant
- * and we can assume the length of s1 is at least that of the string */
-#define _memEQs(s1, s2) \
-        (memEQ((s1), ("" s2 ""), (sizeof(s2)-1)))
-#define _memNEs(s1, s2) (memNE((s1),("" s2 ""),(sizeof(s2)-1)))
+/* Keep these private until we decide it was a good idea */
+#if defined(PERL_CORE) || defined(PERL_EXT) || defined(PERL_EXT_POSIX)
+
+#define strBEGINs(s1,s2) (strncmp(s1,"" s2 "", sizeof(s2)-1) == 0)
+
+#define memBEGINs(s1, l, s2)                                                \
+            (   (Ptrdiff_t) (l) >= (Ptrdiff_t) sizeof(s2) - 1               \
+             && memEQ(s1, "" s2 "", sizeof(s2)-1))
+#define memBEGINPs(s1, l, s2)                                               \
+            (   (Ptrdiff_t) (l) > (Ptrdiff_t) sizeof(s2) - 1                \
+             && memEQ(s1, "" s2 "", sizeof(s2)-1))
+#define memENDs(s1, l, s2)                                                  \
+            (   (Ptrdiff_t) (l) >= (Ptrdiff_t) sizeof(s2) - 1               \
+             && memEQ(s1 + (l) - (sizeof(s2) - 1), "" s2 "", sizeof(s2)-1))
+#define memENDPs(s1, l, s2)                                                 \
+            (   (Ptrdiff_t) (l) > (Ptrdiff_t) sizeof(s2)                    \
+             && memEQ(s1 + (l) - (sizeof(s2) - 1), "" s2 "", sizeof(s2)-1))
+#endif  /* End of making macros private */
  
  #define memLT(s1,s2,l) (memcmp(s1,s2,l) < 0)
  #define memLE(s1,s2,l) (memcmp(s1,s2,l) <= 0)
@@ -1031,11 +1067,9 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
  
  */
  
-/* Specify the widest unsigned type on the platform.  Use U64TYPE because U64
- * is known only in the perl core, and this macro can be called from outside
- * that */
-#ifdef HAS_QUAD
-#   define WIDEST_UTYPE U64TYPE
+/* Specify the widest unsigned type on the platform. */
+#ifdef QUADKIND
+#   define WIDEST_UTYPE U64
  #else
  #   define WIDEST_UTYPE U32
  #endif
@@ -1060,6 +1094,12 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
  #define FITS_IN_8_BITS(c) (1)
  #endif
  
+/* Returns true if c is in the range l..u
+ * Written with the cast so it only needs one conditional test
+ */
+#define inRANGE(c, l, u) (__ASSERT_((u) >= (l))                          \
+    ((WIDEST_UTYPE) (((c) - (l)) | 0) <= ((WIDEST_UTYPE) ((u) - (l)))))
+
  #ifdef EBCDIC
  #   ifndef _ALL_SOURCE
          /* The native libc isascii() et.al. functions return the wrong results
@@ -1099,7 +1139,13 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
   *
   * The first group of these is ordered in what I (khw) estimate to be the
   * frequency of their use.  This gives a slight edge to exiting a loop earlier
- * (in reginclass() in regexec.c) */
+ * (in reginclass() in regexec.c).  Except \v should be last, as it isn't a
+ * real Posix character class, and some (small) inefficiencies in regular
+ * expression handling would be introduced by putting it in the middle of those
+ * that are.  Also, cntrl and ascii come after the others as it may be useful
+ * to group these which have no members that match above Latin1, (or above
+ * ASCII in the latter case) */
+
  #  define _CC_WORDCHAR           0      /* \w and [:word:] */
  #  define _CC_DIGIT              1      /* \d and [:digit:] */
  #  define _CC_ALPHA              2      /* [:alpha:] */
@@ -1110,17 +1156,6 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
  #  define _CC_ALPHANUMERIC       7      /* [:alnum:] */
  #  define _CC_GRAPH              8      /* [:graph:] */
  #  define _CC_CASED              9      /* [:lower:] or [:upper:] under /i */
-
-#define _FIRST_NON_SWASH_CC     10
-/* The character classes above are implemented with swashes.  The second group
- * (just below) contains the ones implemented without.  These are also sorted
- * in rough order of the frequency of their use, except that \v should be last,
- * as it isn't a real Posix character class, and some (small) inefficiencies in
- * regular expression handling would be introduced by putting it in the middle
- * of those that are.  Also, cntrl and ascii come after the others as it may be
- * useful to group these which have no members that match above Latin1, (or
- * above ASCII in the latter case) */
-
  #  define _CC_SPACE             10      /* \s, [:space:] */
  #  define _CC_PSXSPC            _CC_SPACE   /* XXX Temporary, can be removed
                                                 when the deprecated isFOO_utf8()
@@ -1187,36 +1222,8 @@ typedef enum {
  } _char_class_number;
  #endif
  
-#define POSIX_SWASH_COUNT _FIRST_NON_SWASH_CC
  #define POSIX_CC_COUNT    (_HIGHEST_REGCOMP_DOT_H_SYNC + 1)
  
-#if defined(PERL_IN_UTF8_C)                         \
- || defined(PERL_IN_REGCOMP_C)                      \
- || defined(PERL_IN_REGEXEC_C)
-#   if _CC_WORDCHAR != 0 || _CC_DIGIT != 1 || _CC_ALPHA != 2 || _CC_LOWER != 3 \
-       || _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6                   \
-       || _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8 || _CC_CASED != 9
-      #error Need to adjust order of swash_property_names[]
-#   endif
-
-/* This is declared static in each of the few files that this is #defined for
- * to keep them from being publicly accessible.  Hence there is a small amount
- * of wasted space */
-
-static const char* const swash_property_names[] = {
-    "XPosixWord",
-    "XPosixDigit",
-    "XPosixAlpha",
-    "XPosixLower",
-    "XPosixUpper",
-    "XPosixPunct",
-    "XPosixPrint",
-    "XPosixAlnum",
-    "XPosixGraph",
-    "Cased"
-};
-#endif
-
  START_EXTERN_C
  #  ifdef DOINIT
  EXTCONST  U32 PL_charclass[] = {
@@ -1247,17 +1254,28 @@ END_EXTERN_C
          && ((PL_charclass[(U8) (c)] & _CC_mask_A(classnum))     \
                                     == _CC_mask_A(classnum)))
  
-#   define isALPHA_A(c)  _generic_isCC_A(c, _CC_ALPHA)
+/* On ASCII platforms certain classes form a single range.  It's faster to
+ * special case these.  isDIGIT is a single range on all platforms */
+#   ifdef EBCDIC
+#     define isALPHA_A(c)  _generic_isCC_A(c, _CC_ALPHA)
+#     define isGRAPH_A(c)  _generic_isCC_A(c, _CC_GRAPH)
+#     define isLOWER_A(c)  _generic_isCC_A(c, _CC_LOWER)
+#     define isPRINT_A(c)  _generic_isCC_A(c, _CC_PRINT)
+#     define isUPPER_A(c)  _generic_isCC_A(c, _CC_UPPER)
+#   else
+      /* By folding the upper and lowercase, we can use a single range */
+#     define isALPHA_A(c)  inRANGE((~('A' ^ 'a') & (c)), 'A', 'Z')
+#     define isGRAPH_A(c)  inRANGE(c, ' ' + 1, 0x7e)
+#     define isLOWER_A(c)  inRANGE(c, 'a', 'z')
+#     define isPRINT_A(c)  inRANGE(c, ' ', 0x7e)
+#     define isUPPER_A(c)  inRANGE(c, 'A', 'Z')
+#   endif
  #   define isALPHANUMERIC_A(c) _generic_isCC_A(c, _CC_ALPHANUMERIC)
  #   define isBLANK_A(c)  _generic_isCC_A(c, _CC_BLANK)
  #   define isCNTRL_A(c)  _generic_isCC_A(c, _CC_CNTRL)
-#   define isDIGIT_A(c)  _generic_isCC(c, _CC_DIGIT) /* No non-ASCII digits */
-#   define isGRAPH_A(c)  _generic_isCC_A(c, _CC_GRAPH)
-#   define isLOWER_A(c)  _generic_isCC_A(c, _CC_LOWER)
-#   define isPRINT_A(c)  _generic_isCC_A(c, _CC_PRINT)
+#   define isDIGIT_A(c)  inRANGE(c, '0', '9')
  #   define isPUNCT_A(c)  _generic_isCC_A(c, _CC_PUNCT)
  #   define isSPACE_A(c)  _generic_isCC_A(c, _CC_SPACE)
-#   define isUPPER_A(c)  _generic_isCC_A(c, _CC_UPPER)
  #   define isWORDCHAR_A(c) _generic_isCC_A(c, _CC_WORDCHAR)
  #   define isXDIGIT_A(c)  _generic_isCC(c, _CC_XDIGIT) /* No non-ASCII xdigits
                                                          */
@@ -1470,13 +1488,18 @@ END_EXTERN_C
      #define toLOWER(c) (isASCII(c) ? toLOWER_LATIN1(c) : (c))
      #define toUPPER(c) (isASCII(c) ? toUPPER_LATIN1_MOD(c) : (c))
     which uses table lookup and mask instead of subtraction.  (This would
-   work because the _MOD does not apply in the ASCII range) */
+   work because the _MOD does not apply in the ASCII range).
+
+   These actually are UTF-8 invariant casing, not just ASCII, as any non-ASCII
+   UTF-8 invariants are neither upper nor lower.  (Only on EBCDIC platforms are
+   there non-ASCII invariants, and all of them are controls.) */
  #define toLOWER(c)  (isUPPER(c) ? (U8)((c) + ('a' - 'A')) : (c))
  #define toUPPER(c)  (isLOWER(c) ? (U8)((c) - ('a' - 'A')) : (c))
  
  /* In the ASCII range, these are equivalent to what they're here defined to be.
   * But by creating these definitions, other code doesn't have to be aware of
- * this detail */
+ * this detail.  Actually this works for all UTF-8 invariants, not just the
+ * ASCII range. (EBCDIC platforms can have non-ASCII invariants.) */
  #define toFOLD(c)    toLOWER(c)
  #define toTITLE(c)   toUPPER(c)
  
@@ -1534,18 +1557,21 @@ END_EXTERN_C
                                            || (char)(c) == '_'))
  
  /* These next three are also for internal core Perl use only: case-change
- * helper macros */
+ * helper macros.  The reason for using the PL_latin arrays is in case the
+ * system function is defective; it ensures uniform results that conform to the
+ * Unicod standard.   It does not handle the anomalies in UTF-8 Turkic locales */
  #define _generic_toLOWER_LC(c, function, cast)  (! FITS_IN_8_BITS(c)           \
                                                  ? (c)                          \
                                                  : (IN_UTF8_CTYPE_LOCALE)       \
                                                    ? PL_latin1_lc[ (U8) (c) ]   \
-                                                : (cast)function((cast)(c)))
+                                                  : (cast)function((cast)(c)))
  
  /* Note that the result can be larger than a byte in a UTF-8 locale.  It
   * returns a single value, so can't adequately return the upper case of LATIN
   * SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two
   * values "SS");  instead it asserts against that under DEBUGGING, and
- * otherwise returns its input */
+ * otherwise returns its input.  It does not handle the anomalies in UTF-8
+ * Turkic locales. */
  #define _generic_toUPPER_LC(c, function, cast)                                 \
                      (! FITS_IN_8_BITS(c)                                       \
                      ? (c)                                                      \
@@ -1563,7 +1589,8 @@ END_EXTERN_C
   * returns a single value, so can't adequately return the fold case of LATIN
   * SMALL LETTER SHARP S in a UTF-8 locale (which should be a string of two
   * values "ss"); instead it asserts against that under DEBUGGING, and
- * otherwise returns its input */
+ * otherwise returns its input.  It does not handle the anomalies in UTF-8
+ * Turkic locales */
  #define _generic_toFOLD_LC(c, function, cast)                                  \
                      ((UNLIKELY((c) == MICRO_SIGN) && IN_UTF8_CTYPE_LOCALE)     \
                        ? GREEK_SMALL_LETTER_MU                                  \
@@ -1858,13 +1885,6 @@ _generic_utf8_safe(classnum, p, e, _is_utf8_FOO_with_len(classnum, p, e))
               ? 0 /* Note that doesn't check validity for latin1 */          \
               : above_latin1)
  
-/* NOTE that some of these macros have very similar ones in regcharclass.h.
- * For example, there is (at the time of this writing) an 'is_SPACE_utf8()'
- * there, differing in name only by an underscore from the one here
- * 'isSPACE_utf8().  The difference is that the ones here are probably more
- * efficient and smaller, using an O(1) array lookup for Latin1-range code
- * points; the regcharclass.h ones are implemented as a series of
- * "if-else-if-else ..." */
  
  #define isALPHA_utf8(p)         _generic_utf8(ALPHA, p)
  #define isALPHANUMERIC_utf8(p)  _generic_utf8(ALPHANUMERIC, p)
@@ -2244,8 +2264,9 @@ PoisonWith(0xEF) for catching access to freed memory.
  #define NEWSV(x,len)   newSV(len)
  #endif
  
-#define MEM_SIZE_MAX ((MEM_SIZE)~0)
+#define MEM_SIZE_MAX ((MEM_SIZE)-1)
  
+#define _PERL_STRLEN_ROUNDUP_UNCHECKED(n) (((n) - 1 + PERL_STRLEN_ROUNDUP_QUANTUM) & ~((MEM_SIZE)PERL_STRLEN_ROUNDUP_QUANTUM - 1))
  
  #ifdef PERL_MALLOC_WRAP
  
@@ -2260,7 +2281,8 @@ PoisonWith(0xEF) for catching access to freed memory.
   */
  
  #  define _MEM_WRAP_NEEDS_RUNTIME_CHECK(n,t) \
-    (8 * sizeof(n) + sizeof(t) > sizeof(MEM_SIZE))
+    (  sizeof(MEM_SIZE) < sizeof(n) \
+    || sizeof(t) > ((MEM_SIZE)1 << 8*(sizeof(MEM_SIZE) - sizeof(n))))
  
  /* This is written in a slightly odd way to avoid various spurious
   * compiler warnings. We *want* to write the expression as
@@ -2291,17 +2313,22 @@ PoisonWith(0xEF) for catching access to freed memory.
         (void)(UNLIKELY(_MEM_WRAP_WILL_WRAP(n,t)) \
         && (Perl_croak_nocontext("%s",(a)),0))
  
+/* "a" arg must be a string literal */
+#  define MEM_WRAP_CHECK_s(n,t,a) \
+       (void)(UNLIKELY(_MEM_WRAP_WILL_WRAP(n,t)) \
+       && (Perl_croak_nocontext("" a ""),0))
+
  #define MEM_WRAP_CHECK_(n,t) MEM_WRAP_CHECK(n,t),
  
-#define PERL_STRLEN_ROUNDUP(n) ((void)(((n) > MEM_SIZE_MAX - 2 * PERL_STRLEN_ROUNDUP_QUANTUM) ? (croak_memory_wrap(),0):0),((n-1+PERL_STRLEN_ROUNDUP_QUANTUM)&~((MEM_SIZE)PERL_STRLEN_ROUNDUP_QUANTUM-1)))
+#define PERL_STRLEN_ROUNDUP(n) ((void)(((n) > MEM_SIZE_MAX - 2 * PERL_STRLEN_ROUNDUP_QUANTUM) ? (croak_memory_wrap(),0) : 0), _PERL_STRLEN_ROUNDUP_UNCHECKED(n))
  #else
  
  #define MEM_WRAP_CHECK(n,t)
  #define MEM_WRAP_CHECK_1(n,t,a)
-#define MEM_WRAP_CHECK_2(n,t,a,b)
+#define MEM_WRAP_CHECK_s(n,t,a)
  #define MEM_WRAP_CHECK_(n,t)
  
-#define PERL_STRLEN_ROUNDUP(n) (((n-1+PERL_STRLEN_ROUNDUP_QUANTUM)&~((MEM_SIZE)PERL_STRLEN_ROUNDUP_QUANTUM-1)))
+#define PERL_STRLEN_ROUNDUP(n) _PERL_STRLEN_ROUNDUP_UNCHECKED(n)
  
  #endif
  
@@ -2395,6 +2422,9 @@ void Perl_mem_log_del_sv(const SV *sv, const char *filename, const int linenumbe
  #define Safefree(d)    safefree(MEM_LOG_FREE((Malloc_t)(d)))
  #endif
  
+/* assert that a valid ptr has been supplied - use this instead of assert(ptr)  *
+ * as it handles cases like constant string arguments without throwing warnings *
+ * the cast is required, as is the inequality check, to avoid warnings          */
  #define perl_assert_ptr(p) assert( ((void*)(p)) != 0 )
  
  
@@ -2402,6 +2432,7 @@ void Perl_mem_log_del_sv(const SV *sv, const char *filename, const int linenumbe
  #define Copy(s,d,n,t)  (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), perl_assert_ptr(s), (void)memcpy((char*)(d),(const char*)(s), (n) * sizeof(t)))
  #define Zero(d,n,t)    (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), (void)memzero((char*)(d), (n) * sizeof(t)))
  
+/* Like above, but returns a pointer to 'd' */
  #define MoveD(s,d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), perl_assert_ptr(s), memmove((char*)(d),(const char*)(s), (n) * sizeof(t)))
  #define CopyD(s,d,n,t) (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), perl_assert_ptr(s), memcpy((char*)(d),(const char*)(s), (n) * sizeof(t)))
  #define ZeroD(d,n,t)   (MEM_WRAP_CHECK_(n,t) perl_assert_ptr(d), memzero((char*)(d), (n) * sizeof(t)))