This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8.h, utfebcdic.h: Add some assertions
authorKarl Williamson <khw@cpan.org>
Wed, 26 Aug 2015 21:35:05 +0000 (15:35 -0600)
committerKarl Williamson <khw@cpan.org>
Fri, 4 Sep 2015 16:49:22 +0000 (10:49 -0600)
These will detect a array bounds error that occurs on EBCDIC machines,
and by including the assert on non-EBCDIC, we verify that the code
wouldn't fail when built on EBCDIC.

utf8.h
utfebcdic.h

diff --git a/utf8.h b/utf8.h
index 668626f..17f0e82 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -127,8 +127,8 @@ END_EXTERN_C
 
 /* Native character to/from iso-8859-1.  Are the identity functions on ASCII
  * platforms */
-#define NATIVE_TO_LATIN1(ch)     (ch)
-#define LATIN1_TO_NATIVE(ch)     (ch)
+#define NATIVE_TO_LATIN1(ch)     (__ASSERT_(FITS_IN_8_BITS(ch)) (ch))
+#define LATIN1_TO_NATIVE(ch)     (__ASSERT_(FITS_IN_8_BITS(ch)) (ch))
 
 /* I8 is an intermediate version of UTF-8 used only in UTF-EBCDIC.  We thus
  * consider it to be identical to UTF-8 on ASCII platforms.  Strictly speaking
@@ -136,8 +136,8 @@ END_EXTERN_C
  * because they are 8-bit encodings that serve the same purpose in Perl, and
  * rarely do we need to distinguish them.  The term "NATIVE_UTF8" applies to
  * whichever one is applicable on the current platform */
-#define NATIVE_UTF8_TO_I8(ch) (ch)
-#define I8_TO_NATIVE_UTF8(ch) (ch)
+#define NATIVE_UTF8_TO_I8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) (ch))
+#define I8_TO_NATIVE_UTF8(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) (ch))
 
 /* Transforms in wide UV chars */
 #define UNI_TO_NATIVE(ch)        (ch)
index f490b58..51e6e77 100644 (file)
@@ -133,11 +133,13 @@ END_EXTERN_C
 
 /* EBCDIC-happy ways of converting native code to UTF-8 */
 
-#define NATIVE_TO_LATIN1(ch)            PL_e2a[(U8)(ch)]
-#define LATIN1_TO_NATIVE(ch)            PL_a2e[(U8)(ch)]
+/* Use these when ch is known to be < 256 */
+#define NATIVE_TO_LATIN1(ch)            (__ASSERT_(FITS_IN_8_BITS(ch)) PL_e2a[(U8)(ch)])
+#define LATIN1_TO_NATIVE(ch)            (__ASSERT_(FITS_IN_8_BITS(ch)) PL_a2e[(U8)(ch)])
 
-#define NATIVE_UTF8_TO_I8(ch)           PL_e2utf[(U8)(ch)]
-#define I8_TO_NATIVE_UTF8(ch)           PL_utf2e[(U8)(ch)]
+/* Use these on bytes */
+#define NATIVE_UTF8_TO_I8(b)           (__ASSERT_(FITS_IN_8_BITS(b)) PL_e2utf[(U8)(b)])
+#define I8_TO_NATIVE_UTF8(b)           (__ASSERT_(FITS_IN_8_BITS(b)) PL_utf2e[(U8)(b)])
 
 /* Transforms in wide UV chars */
 #define NATIVE_TO_UNI(ch)        (((ch) > 255) ? (ch) : NATIVE_TO_LATIN1(ch))