handy.h: Simplify some macro definitions

[perl5.git] / handy.h
diff --git a/handy.h b/handy.h

index df28b92..64d8b3a 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -551,6 +551,12 @@ C<isASCII_LC_utf8>.  Note, however, that some platforms do not have the C
  library routine C<isascii()>.  In these cases, the variants whose names contain
  C<LC> are the same as the corresponding ones without.
  
+Also note, that because all ASCII characters are UTF-8 invariant (meaning they
+have the exact same representation (always a single byte) whether encoded in
+UTF-8 or not), C<isASCII> will give the correct results when called with any
+byte in any string encoded or not in UTF-8.  And similarly C<isASCII_utf8> will
+work properly on any string encoded or not in UTF-8.
+
  =for apidoc Am|bool|isBLANK|char ch
  Returns a boolean indicating whether the specified character is a
  character considered to be a blank, analogous to C<m/[[:blank:]]/>.
@@ -700,17 +706,125 @@ Behaviour is only well defined when isXDIGIT(*str) is true.
  
  =head1 Character case changing
  
-=for apidoc Am|char|toUPPER|char ch
-Converts the specified character to uppercase, if possible; otherwise returns
-the input character itself.
+=for apidoc Am|U8|toUPPER|U8 ch
+Converts the specified character to uppercase.  If the input is anything but an
+ASCII lowercase character, that input character itself is returned.  Variant
+C<toUPPER_A> is equivalent.
+
+=for apidoc Am|UV|toUPPER_uni|UV cp|U8* s|STRLEN* lenp
+Converts the Unicode code point C<cp> to its uppercase version, and
+stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>.  Note
+that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>
+bytes since the uppercase version may be longer than the original character.
+
+The first code point of the uppercased version is returned
+(but note, as explained just above, that there may be more.)
+
+=for apidoc Am|UV|toUPPER_utf8|U8* p|U8* s|STRLEN* lenp
+Converts the UTF-8 encoded character at C<p> to its uppercase version, and
+stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>.  Note
+that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>
+bytes since the uppercase version may be longer than the original character.
+
+The first code point of the uppercased version is returned
+(but note, as explained just above, that there may be more.)
+
+The input character at C<p> is assumed to be well-formed.
+
+=for apidoc Am|U8|toFOLD|U8 ch
+Converts the specified character to foldcase.  If the input is anything but an
+ASCII uppercase character, that input character itself is returned.  Variant
+C<toFOLD_A> is equivalent.  (There is no equivalent C<to_FOLD_L1> for the full
+Latin1 range, as the full generality of L</toFOLD_uni> is needed there.)
+
+=for apidoc Am|UV|toFOLD_uni|UV cp|U8* s|STRLEN* lenp
+Converts the Unicode code point C<cp> to its foldcase version, and
+stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>.  Note
+that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>
+bytes since the foldcase version may be longer than the original character.
+
+The first code point of the foldcased version is returned
+(but note, as explained just above, that there may be more.)
+
+=for apidoc Am|UV|toFOLD_utf8|U8* p|U8* s|STRLEN* lenp
+Converts the UTF-8 encoded character at C<p> to its foldcase version, and
+stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>.  Note
+that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>
+bytes since the foldcase version may be longer than the original character.
+
+The first code point of the foldcased version is returned
+(but note, as explained just above, that there may be more.)
+
+The input character at C<p> is assumed to be well-formed.
+
+=for apidoc Am|U8|toLOWER|U8 ch
+Converts the specified character to lowercase.  If the input is anything but an
+ASCII uppercase character, that input character itself is returned.  Variant
+C<toLOWER_A> is equivalent.
+
+=for apidoc Am|U8|toLOWER_L1|U8 ch
+Converts the specified Latin1 character to lowercase.  The results are undefined if
+the input doesn't fit in a byte.
+
+=for apidoc Am|U8|toLOWER_LC|U8 ch
+Converts the specified character to lowercase using the current locale's rules,
+if possible; otherwise returns the input character itself.
+
+=for apidoc Am|UV|toLOWER_uni|UV cp|U8* s|STRLEN* lenp
+Converts the Unicode code point C<cp> to its lowercase version, and
+stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>.  Note
+that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>
+bytes since the lowercase version may be longer than the original character.
+
+The first code point of the lowercased version is returned
+(but note, as explained just above, that there may be more.)
+
+=for apidoc Am|UV|toLOWER_utf8|U8* p|U8* s|STRLEN* lenp
+Converts the UTF-8 encoded character at C<p> to its lowercase version, and
+stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>.  Note
+that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>
+bytes since the lowercase version may be longer than the original character.
+
+The first code point of the lowercased version is returned
+(but note, as explained just above, that there may be more.)
+
+The input character at C<p> is assumed to be well-formed.
+
+=for apidoc Am|U8|toLOWER_LC|U8 ch
+Converts the specified character to lowercase using the current locale's rules,
+if possible; otherwise returns the input character itself.
+
+=for apidoc Am|U8|toTITLE|U8 ch
+Converts the specified character to titlecase.  If the input is anything but an
+ASCII lowercase character, that input character itself is returned.  Variant
+C<toTITLE_A> is equivalent.  (There is no C<toTITLE_L1> for the full Latin1 range,
+as the full generality of L</toTITLE_uni> is needed there.  Titlecase is not a
+concept used in locale handling, so there is no functionality for that.)
+
+=for apidoc Am|UV|toTITLE_uni|UV cp|U8* s|STRLEN* lenp
+Converts the Unicode code point C<cp> to its titlecase version, and
+stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>.  Note
+that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>
+bytes since the titlecase version may be longer than the original character.
+
+The first code point of the titlecased version is returned
+(but note, as explained just above, that there may be more.)
+
+=for apidoc Am|UV|toTITLE_utf8|U8* p|U8* s|STRLEN* lenp
+Converts the UTF-8 encoded character at C<p> to its titlecase version, and
+stores that in UTF-8 in C<s>, and its length in bytes in C<lenp>.  Note
+that the buffer pointed to by C<s> needs to be at least C<UTF8_MAXBYTES_CASE+1>
+bytes since the titlecase version may be longer than the original character.
+
+The first code point of the titlecased version is returned
+(but note, as explained just above, that there may be more.)
  
-=for apidoc Am|char|toLOWER|char ch
-Converts the specified character to lowercase, if possible; otherwise returns
-the input character itself.
+The input character at C<p> is assumed to be well-formed.
  
  =cut
  
-XXX Still undocumented isVERTWS_uni and _utf8, and the other toUPPER etc functions
+XXX Still undocumented isVERTWS_uni and _utf8; it's unclear what their names
+really should be.  Also toUPPER_LC and toFOLD_LC, which are subject to change.
  
  Note that these macros are repeated in Devel::PPPort, so should also be
  patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
@@ -736,7 +850,8 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
  #define FITS_IN_8_BITS(c) ((sizeof(c) == 1) || !(((WIDEST_UTYPE)(c)) & ~0xFF))
  
  #ifdef EBCDIC
-#   define isASCII(c)    (FITS_IN_8_BITS(c) && (NATIVE_TO_UNI((U8) (c)) < 128))
+#   define isASCII(c)    (FITS_IN_8_BITS(c)                      \
+                         && (NATIVE_TO_LATIN1((U8) (c)) < 128))
  #else
  #   define isASCII(c)    ((WIDEST_UTYPE)(c) < 128)
  #endif
@@ -875,7 +990,7 @@ EXTCONST U32 PL_charclass[];
      /* The 1U keeps Solaris from griping when shifting sets the uppermost bit */
  #   define _CC_mask(classnum) (1U << (classnum))
  #   define _generic_isCC(c, classnum) cBOOL(FITS_IN_8_BITS(c) \
-                && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask(classnum)))
+                && (PL_charclass[(U8) (c)] & _CC_mask(classnum)))
  
      /* The mask for the _A versions of the macros; it just adds in the bit for
       * ASCII. */
@@ -884,7 +999,7 @@ EXTCONST U32 PL_charclass[];
      /* The _A version makes sure that both the desired bit and the ASCII bit
       * are present */
  #   define _generic_isCC_A(c, classnum) (FITS_IN_8_BITS(c) \
-        && ((PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask_A(classnum)) \
+        && ((PL_charclass[(U8) (c)] & _CC_mask_A(classnum)) \
                                  == _CC_mask_A(classnum)))
  
  #   define isALPHA_A(c)  _generic_isCC_A(c, _CC_ALPHA)
@@ -905,7 +1020,7 @@ EXTCONST U32 PL_charclass[];
  
      /* Either participates in a fold with a character above 255, or is a
       * multi-char fold */
-#   define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
+#   define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) (c)] & _CC_mask(_CC_NONLATIN1_FOLD)))
  
  #   define _isQUOTEMETA(c) _generic_isCC(c, _CC_QUOTEMETA)
  #   define _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
@@ -913,44 +1028,37 @@ EXTCONST U32 PL_charclass[];
  #   define _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) \
                                              _generic_isCC(c, _CC_IS_IN_SOME_FOLD)
  #else   /* No perl.h. */
+#   define isBLANK_A(c)  ((c) == ' ' || (c) == '\t')
+#   define isIDFIRST_A(c) (isALPHA_A(c) || (c) == '_')
+#   define isWORDCHAR_A(c) (isALPHANUMERIC_A(c) || (c) == '_')
+#   define isPSXSPC_A(c) (isSPACE_A(c) || (c) == '\v')
  #   ifdef EBCDIC
  #       define isALPHA_A(c)    (isASCII(c) && isALPHA(c))
  #       define isALPHANUMERIC_A(c) (isASCII(c) && isALPHANUMERIC(c))
-#       define isBLANK_A(c)    (isASCII(c) && isBLANK(c))
  #       define isCNTRL_A(c)    (isASCII(c) && isCNTRL(c))
  #       define isDIGIT_A(c)    (isASCII(c) && isDIGIT(c))
  #       define isGRAPH_A(c)    (isASCII(c) && isGRAPH(c))
-#       define isIDFIRST_A(c)  (isASCII(c) && isIDFIRST(c))
  #       define isLOWER_A(c)    (isASCII(c) && isLOWER(c))
  #       define isPRINT_A(c)    (isASCII(c) && isPRINT(c))
-#       define isPSXSPC_A(c)   (isASCII(c) && isPSXSPC(c))
  #       define isPUNCT_A(c)    (isASCII(c) && isPUNCT(c))
  #       define isSPACE_A(c)    (isASCII(c) && isSPACE(c))
  #       define isUPPER_A(c)    (isASCII(c) && isUPPER(c))
-#       define isWORDCHAR_A(c) (isASCII(c) && isWORDCHAR(c))
  #       define isXDIGIT_A(c)   (isASCII(c) && isXDIGIT(c))
  #   else   /* ASCII platform, no perl.h */
  #       define isALPHA_A(c)  (isUPPER_A(c) || isLOWER_A(c))
  #       define isALPHANUMERIC_A(c) (isALPHA_A(c) || isDIGIT_A(c))
-#       define isBLANK_A(c)  ((c) == ' ' || (c) == '\t')
-#       define isCNTRL_A(c) (FITS_IN_8_BITS(c) && ((U8) (c) < ' ' || (c) == 127))
+#       define isCNTRL_A(c)  (isASCII(c) && ! isPRINT_A(c))
  #       define isDIGIT_A(c)  ((c) <= '9' && (c) >= '0')
-#       define isGRAPH_A(c)  (isWORDCHAR_A(c) || isPUNCT_A(c))
-#       define isIDFIRST_A(c) (isALPHA_A(c) || (c) == '_')
+#       define isGRAPH_A(c)  (isPRINT_A(c) && (c) ! = ' ')
  #       define isLOWER_A(c)  ((c) >= 'a' && (c) <= 'z')
  #       define isPRINT_A(c)  (((c) >= 32 && (c) < 127))
-#       define isPSXSPC_A(c) (isSPACE_A(c) || (c) == '\v')
-#       define isPUNCT_A(c)  (((c) >= 33 && (c) <= 47)              \
-                              || ((c) >= 58 && (c) <= 64)           \
-                              || ((c) >= 91 && (c) <= 96)           \
-                              || ((c) >= 123 && (c) <= 126))
+#       define isPUNCT_A(c)  (isGRAPH_A(c) && ! isALPHANUMERIC(c))
  #       define isSPACE_A(c)  ((c) == ' '                            \
                                || (c) == '\t'                        \
                                || (c) == '\n'                        \
                                || (c) =='\r'                         \
                                || (c) == '\f')
  #       define isUPPER_A(c)  ((c) <= 'Z' && (c) >= 'A')
-#       define isWORDCHAR_A(c) (isALPHA_A(c) || isDIGIT_A(c) || (c) == '_')
  #       define isXDIGIT_A(c)   (isDIGIT_A(c)                        \
                                  || ((c) >= 'a' && (c) <= 'f')       \
                                  || ((c) <= 'F' && (c) >= 'A'))
@@ -980,19 +1088,19 @@ EXTCONST U32 PL_charclass[];
         * for backwards compatibility */
      /* ALPHAU includes Unicode semantics for latin1 characters.  It has an extra
       * >= AA test to speed up ASCII-only tests at the expense of the others */
-#   define isALPHA_L1(c) (isALPHA(c) || (NATIVE_TO_UNI((U8) c) >= 0xAA \
-       && ((NATIVE_TO_UNI((U8) c) >= 0xC0 \
-             && NATIVE_TO_UNI((U8) c) != 0xD7 && NATIVE_TO_UNI((U8) c) != 0xF7) \
-           || NATIVE_TO_UNI((U8) c) == 0xAA \
-           || NATIVE_TO_UNI((U8) c) == 0xB5 \
-           || NATIVE_TO_UNI((U8) c) == 0xBA)))
+#   define isALPHA_L1(c) (isALPHA(c) || (NATIVE_TO_LATIN1((U8) c) >= 0xAA \
+       && ((NATIVE_TO_LATIN1((U8) c) >= 0xC0 \
+             && NATIVE_TO_LATIN1((U8) c) != 0xD7 && NATIVE_TO_LATIN1((U8) c) != 0xF7) \
+           || NATIVE_TO_LATIN1((U8) c) == 0xAA \
+           || NATIVE_TO_LATIN1((U8) c) == 0xB5 \
+           || NATIVE_TO_LATIN1((U8) c) == 0xBA)))
  #   define isCHARNAME_CONT(c) (isWORDCHAR_L1(c)                         \
                                 || (c) == ' '                            \
                                 || (c) == '-'                            \
                                 || (c) == '('                            \
                                 || (c) == ')'                            \
                                 || (c) == ':'                            \
-                               || NATIVE_TO_UNI((U8) c) == 0xA0)
+                               || NATIVE_TO_LATIN1((U8) c) == 0xA0)
  #endif
  
  /* Macros that differ between EBCDIC and ASCII.  Where C89 defines a function,
@@ -1004,7 +1112,7 @@ EXTCONST U32 PL_charclass[];
  #ifdef EBCDIC
  #   define isALPHA(c)  isalpha(c)
  #   define isALPHANUMERIC(c)   isalnum(c)
-#   define isBLANK(c)  ((c) == ' ' || (c) == '\t' || NATIVE_TO_UNI(c) == 0xA0)
+#   define isBLANK(c)  ((c) == ' ' || (c) == '\t' || NATIVE_TO_LATIN1(c) == 0xA0)
  #   define isCNTRL(c)  iscntrl(c)
  #   define isDIGIT(c)  isdigit(c)
  #   define isGRAPH(c)  isgraph(c)
@@ -1046,21 +1154,31 @@ EXTCONST U32 PL_charclass[];
  #   define toUPPER(c)  (isLOWER(c) ? (c) - ('a' - 'A') : (c))
  #endif
  
+/* In the ASCII range, these are equivalent to what they're here defined to be.
+ * But by creating these definitions, other code doesn't have to be aware of
+ * this detail */
+#define toFOLD(c)    toLOWER(c)
+#define toFOLD_LC(c) toLOWER_LC(c)
+#define toTITLE(c)   toUPPER(c)
+
+#define toLOWER_A(c) toLOWER(c)
+#define toUPPER_A(c) toUPPER(c)
+#define toFOLD_A(c)  toFOLD(c)
+#define toTITLE_A(c) toTITLE(c)
  
  /* Use table lookup for speed; return error character for input
   * out-of-range */
-#define toLOWER_LATIN1(c)    (FITS_IN_8_BITS(c)                            \
-                             ? UNI_TO_NATIVE(PL_latin1_lc[                 \
-                                               NATIVE_TO_UNI( (U8) (c)) ]) \
-                             : UNICODE_REPLACEMENT)
+#define toLOWER_LATIN1(c)    ((! FITS_IN_8_BITS(c))                        \
+                             ? (c)                                         \
+                             : PL_latin1_lc[ (U8) (c) ])
+#define toLOWER_L1(c)    toLOWER_LATIN1(c)  /* Synonym for consistency */
+
  /* Modified uc.  Is correct uc except for three non-ascii chars which are
   * all mapped to one of them, and these need special handling; error
   * character for input out-of-range */
-#define toUPPER_LATIN1_MOD(c) (FITS_IN_8_BITS(c)                           \
-                              ? UNI_TO_NATIVE(PL_mod_latin1_uc[            \
-                                               NATIVE_TO_UNI( (U8) (c)) ]) \
-                              : UNICODE_REPLACEMENT)
-
+#define toUPPER_LATIN1_MOD(c) ((! FITS_IN_8_BITS(c))                       \
+                               ? (c)                                       \
+                               : PL_mod_latin1_uc[ (U8) (c) ])
  #ifdef USE_NEXT_CTYPE
  
  #  define isALPHANUMERIC_LC(c) NXIsAlNum((unsigned int)(c))
@@ -1234,9 +1352,9 @@ EXTCONST U32 PL_charclass[];
                                           ? _generic_isCC(*(p), classnum)       \
                                           : (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
                                             ? _generic_isCC(                    \
-                                                   TWO_BYTE_UTF8_TO_UNI(*(p),  \
+                                                TWO_BYTE_UTF8_TO_NATIVE(*(p),  \
                                                                     *((p)+1 )), \
-                                                   classnum)                   \
+                                                classnum)                      \
                                             : utf8)
  /* Like the above, but calls 'above_latin1(p)' to get the utf8 value.  'above_latin1'
   * can be a macro */
@@ -1301,6 +1419,7 @@ EXTCONST U32 PL_charclass[];
  #define isXDIGIT_utf8(p)        _generic_utf8_no_upper_latin1(_CC_XDIGIT, p,   \
                                                            is_XDIGIT_high(p))
  
+#define toFOLD_utf8(p,s,l)     to_utf8_fold(p,s,l)
  #define toLOWER_utf8(p,s,l)    to_utf8_lower(p,s,l)
  #define toTITLE_utf8(p,s,l)    to_utf8_title(p,s,l)
  #define toUPPER_utf8(p,s,l)    to_utf8_upper(p,s,l)
@@ -1310,11 +1429,11 @@ EXTCONST U32 PL_charclass[];
   * use the value given by the 'utf8' parameter.  This relies on the fact that
   * ASCII characters have the same representation whether utf8 or not.  Note
   * that it assumes that the utf8 has been validated, and ignores 'use bytes' */
-#define _generic_LC_utf8(macro, p, utf8)                                   \
-                         (UTF8_IS_INVARIANT(*(p))                          \
-                         ? macro(*(p))                                     \
-                         : (UTF8_IS_DOWNGRADEABLE_START(*(p)))             \
-                           ? macro(TWO_BYTE_UTF8_TO_UNI(*(p), *((p)+1)))   \
+#define _generic_LC_utf8(macro, p, utf8)                                    \
+                         (UTF8_IS_INVARIANT(*(p))                           \
+                         ? macro(*(p))                                      \
+                         : (UTF8_IS_DOWNGRADEABLE_START(*(p)))              \
+                           ? macro(TWO_BYTE_UTF8_TO_NATIVE(*(p), *((p)+1))) \
                             : utf8)
  
  #define _generic_LC_swash_utf8(macro, classnum, p)                         \
@@ -1370,7 +1489,7 @@ EXTCONST U32 PL_charclass[];
   * CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII, except that they don't
   * necessarily mean the same characters, e.g. CTRL-D is 4 on both systems, but
   * that is EOT on ASCII;  ST on EBCDIC */
-#  define toCTRL(c)    (toUPPER(NATIVE_TO_UNI(c)) ^ 64)
+#  define toCTRL(c)    (toUPPER(NATIVE_TO_LATIN1(c)) ^ 64)
  
  /* Line numbers are unsigned, 32 bits. */
  typedef U32 line_t;
@@ -1484,12 +1603,12 @@ PoisonWith(0xEF) for catching access to freed memory.
   * (U16)n > (size_t)~0/sizeof(U16) always being false. */
  #ifdef PERL_MALLOC_WRAP
  #define MEM_WRAP_CHECK(n,t) \
-       (void)(sizeof(t) > 1 && ((MEM_SIZE)(n)+0.0) > MEM_SIZE_MAX/sizeof(t) && (Perl_croak_memory_wrap(),0))
+       (void)(sizeof(t) > 1 && ((MEM_SIZE)(n)+0.0) > MEM_SIZE_MAX/sizeof(t) && (croak_memory_wrap(),0))
  #define MEM_WRAP_CHECK_1(n,t,a) \
         (void)(sizeof(t) > 1 && ((MEM_SIZE)(n)+0.0) > MEM_SIZE_MAX/sizeof(t) && (Perl_croak_nocontext("%s",(a)),0))
  #define MEM_WRAP_CHECK_(n,t) MEM_WRAP_CHECK(n,t),
  
-#define PERL_STRLEN_ROUNDUP(n) ((void)(((n) > MEM_SIZE_MAX - 2 * PERL_STRLEN_ROUNDUP_QUANTUM) ?  (Perl_croak_memory_wrap(),0):0),((n-1+PERL_STRLEN_ROUNDUP_QUANTUM)&~((MEM_SIZE)PERL_STRLEN_ROUNDUP_QUANTUM-1)))
+#define PERL_STRLEN_ROUNDUP(n) ((void)(((n) > MEM_SIZE_MAX - 2 * PERL_STRLEN_ROUNDUP_QUANTUM) ? (croak_memory_wrap(),0):0),((n-1+PERL_STRLEN_ROUNDUP_QUANTUM)&~((MEM_SIZE)PERL_STRLEN_ROUNDUP_QUANTUM-1)))
  #else
  
  #define MEM_WRAP_CHECK(n,t)
@@ -1660,6 +1779,37 @@ void Perl_mem_log_del_sv(const SV *sv, const char *filename, const int linenumbe
  #  define deprecate(s) Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), "Use of " s " is deprecated")
  #endif
  
+/* Internal macros to deal with gids and uids */
+#ifdef PERL_CORE
+
+#  if Uid_t_size > IVSIZE
+#    define sv_setuid(sv, uid)       sv_setnv((sv), (NV)(uid))
+#    define SvUID(sv)                SvNV(sv)
+#  else
+#    if Uid_t_sign <= 0
+#      define sv_setuid(sv, uid)       sv_setiv((sv), (IV)(uid))
+#      define SvUID(sv)                SvIV(sv)
+#    else
+#      define sv_setuid(sv, uid)       sv_setuv((sv), (UV)(uid))
+#      define SvUID(sv)                SvUV(sv)
+#    endif
+#  endif /* Uid_t_size */
+
+#  if Gid_t_size > IVSIZE
+#    define sv_setgid(sv, gid)       sv_setnv((sv), (NV)(gid))
+#    define SvGID(sv)                SvNV(sv)
+#  else
+#    if Gid_t_sign <= 0
+#      define sv_setgid(sv, gid)       sv_setiv((sv), (IV)(gid))
+#      define SvGID(sv)                SvIV(sv)
+#    else
+#      define sv_setgid(sv, gid)       sv_setuv((sv), (UV)(gid))
+#      define SvGID(sv)                SvUV(sv)
+#    endif
+#  endif /* Gid_t_size */
+
+#endif
+
  #endif  /* HANDY_H */
  
  /*