From: Karl Williamson Date: Wed, 26 Aug 2015 21:35:05 +0000 (-0600) Subject: utf8.h, utfebcdic.h: Add some assertions X-Git-Tag: if-0.0605~67 X-Git-Url: https://perl5.git.perl.org/perl5.git/commitdiff_plain/e9b19ab7df3480e8f710ca6faad519f6fccdb081 utf8.h, utfebcdic.h: Add some assertions These will detect a array bounds error that occurs on EBCDIC machines, and by including the assert on non-EBCDIC, we verify that the code wouldn't fail when built on EBCDIC. --- diff --git a/utf8.h b/utf8.h index 668626f..17f0e82 100644 --- a/utf8.h +++ b/utf8.h @@ -127,8 +127,8 @@ END_EXTERN_C /* Native character to/from iso-8859-1. Are the identity functions on ASCII * platforms */ -#define NATIVE_TO_LATIN1(ch) (ch) -#define LATIN1_TO_NATIVE(ch) (ch) +#define NATIVE_TO_LATIN1(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) (ch)) +#define LATIN1_TO_NATIVE(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) (ch)) /* I8 is an intermediate version of UTF-8 used only in UTF-EBCDIC. We thus * consider it to be identical to UTF-8 on ASCII platforms. Strictly speaking @@ -136,8 +136,8 @@ END_EXTERN_C * because they are 8-bit encodings that serve the same purpose in Perl, and * rarely do we need to distinguish them. The term "NATIVE_UTF8" applies to * whichever one is applicable on the current platform */ -#define NATIVE_UTF8_TO_I8(ch) (ch) -#define I8_TO_NATIVE_UTF8(ch) (ch) +#define NATIVE_UTF8_TO_I8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) (ch)) +#define I8_TO_NATIVE_UTF8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) (ch)) /* Transforms in wide UV chars */ #define UNI_TO_NATIVE(ch) (ch) diff --git a/utfebcdic.h b/utfebcdic.h index f490b58..51e6e77 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -133,11 +133,13 @@ END_EXTERN_C /* EBCDIC-happy ways of converting native code to UTF-8 */ -#define NATIVE_TO_LATIN1(ch) PL_e2a[(U8)(ch)] -#define LATIN1_TO_NATIVE(ch) PL_a2e[(U8)(ch)] +/* Use these when ch is known to be < 256 */ +#define NATIVE_TO_LATIN1(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) PL_e2a[(U8)(ch)]) +#define LATIN1_TO_NATIVE(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) PL_a2e[(U8)(ch)]) -#define NATIVE_UTF8_TO_I8(ch) PL_e2utf[(U8)(ch)] -#define I8_TO_NATIVE_UTF8(ch) PL_utf2e[(U8)(ch)] +/* Use these on bytes */ +#define NATIVE_UTF8_TO_I8(b) (__ASSERT_(FITS_IN_8_BITS(b)) PL_e2utf[(U8)(b)]) +#define I8_TO_NATIVE_UTF8(b) (__ASSERT_(FITS_IN_8_BITS(b)) PL_utf2e[(U8)(b)]) /* Transforms in wide UV chars */ #define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_LATIN1(ch))