From: Karl Williamson Date: Wed, 2 Oct 2019 22:48:38 +0000 (-0600) Subject: Make defn of UTF_IS_CONTINUATION_MASK common X-Git-Tag: v5.31.5~83^2~8 X-Git-Url: https://perl5.git.perl.org/perl5.git/commitdiff_plain/9f3cfb7a26dab519dbc83ef02bd3fbf084cb6fc3 Make defn of UTF_IS_CONTINUATION_MASK common This variable can be defined from the same base in both UTF-8 and UTF-EBCDIC, and doing so eliminates an EBCDIC dependency. --- diff --git a/utf8.h b/utf8.h index 472527c..356c3e4 100644 --- a/utf8.h +++ b/utf8.h @@ -311,9 +311,6 @@ C is Unicode if above 255; otherwise is platform-native. #define UTF8_IS_START(c) (__ASSERT_(FITS_IN_8_BITS(c)) \ ((U8)((c) | 0)) >= 0xc2) -/* For use in UTF8_IS_CONTINUATION() below */ -#define UTF_IS_CONTINUATION_MASK 0xC0 - /* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the * first byte thereof? The |0 makes sure this isn't mistakenly called with a * ptr argument */ @@ -360,6 +357,10 @@ C is Unicode if above 255; otherwise is platform-native. * UTF-8, 0x1F in UTF-EBCDIC. */ #define UTF_CONTINUATION_MASK ((U8) ((1U << UTF_ACCUMULATION_SHIFT) - 1)) +/* For use in UTF8_IS_CONTINUATION(). This turns out to be 0xC0 in UTF-8, + * E0 in UTF-EBCDIC */ +#define UTF_IS_CONTINUATION_MASK ((U8) (0xFF << UTF_ACCUMULATION_SHIFT)) + /* Internal macro to be used only in this file to aid in constructing other * publicly accessible macros. * The number of bytes required to express this uv in UTF-8, for just those diff --git a/utfebcdic.h b/utfebcdic.h index d8278a1..f13f555 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -220,8 +220,6 @@ explicitly forbidden, and the shortest possible encoding should always be used #define UTF8_IS_START(c) _generic_isCC(c, _CC_UTF8_IS_START) -#define UTF_IS_CONTINUATION_MASK 0xE0 - #define UTF8_IS_CONTINUATION(c) _generic_isCC(c, _CC_UTF8_IS_CONTINUATION) /* The above instead could be written as this: