This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8.h: Combine ASCII and EBCDIC defines into one
authorKarl Williamson <khw@cpan.org>
Fri, 6 Nov 2015 19:54:55 +0000 (12:54 -0700)
committerKarl Williamson <khw@cpan.org>
Sun, 6 Dec 2015 05:06:49 +0000 (22:06 -0700)
By using a more fundamental value, these two definitions of the macro
can be made the same, so only need one, common to both platforms

utf8.h
utfebcdic.h

diff --git a/utf8.h b/utf8.h
index 77b6a6e..277ed52 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -241,9 +241,6 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
  * real information */
 #define UTF_ACCUMULATION_SHIFT         6
 
-/* 2**UTF_ACCUMULATION_SHIFT - 1 */
-#define UTF_CONTINUATION_MASK          ((U8)0x3f)
-
 #if UVSIZE >= 8
 #  define UTF8_QUAD_MAX UINT64_C(0x1000000000)
 
@@ -290,6 +287,9 @@ encoded as UTF-8.  C<cp> is a native (ASCII or EBCDIC) code point if less than
 
 #endif /* EBCDIC vs ASCII */
 
+/* 2**UTF_ACCUMULATION_SHIFT - 1 */
+#define UTF_CONTINUATION_MASK  ((U8) ((1U << UTF_ACCUMULATION_SHIFT) - 1))
+
 /* The maximum number of UTF-8 bytes a single Unicode character can
  * uppercase/lowercase/fold into.  Unicode guarantees that the maximum
  * expansion is UTF8_MAX_FOLD_CHAR_EXPAND characters, but any above-Unicode
index ccb1fa1..09defa9 100644 (file)
@@ -233,7 +233,6 @@ above what a 64 bit word can hold */
                 _generic_isCC(c, _CC_UTF8_START_BYTE_IS_FOR_AT_LEAST_SURROGATE)
 
 #define UTF_CONTINUATION_MARK          0xA0
-#define UTF_CONTINUATION_MASK          ((U8)0x1f)
 #define UTF_ACCUMULATION_SHIFT         5
 
 /* ^? is defined to be APC on EBCDIC systems.  See the definition of toCTRL()