This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8.h, utfebcdic.h: Add #define
authorKarl Williamson <khw@cpan.org>
Mon, 9 Nov 2015 05:13:44 +0000 (22:13 -0700)
committerKarl Williamson <khw@cpan.org>
Thu, 10 Dec 2015 00:11:54 +0000 (17:11 -0700)
for future use

utf8.h
utfebcdic.h

diff --git a/utf8.h b/utf8.h
index aefbe37..c57576b 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -221,9 +221,13 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
  * illegal overlong sequences that begin with C0 and C1. */
 #define UTF8_IS_START(c)            (((U8)c) >= 0xc2)
 
+/* For use in UTF8_IS_CONTINUATION() below */
+#define UTF_IS_CONTINUATION_MASK    0xC0
+
 /* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the
  * first byte thereof?  */
-#define UTF8_IS_CONTINUATION(c)     ((((U8)c) & 0xC0) == UTF_CONTINUATION_MARK)
+#define UTF8_IS_CONTINUATION(c)                                             \
+            ((((U8)c) & UTF_IS_CONTINUATION_MASK) == UTF_CONTINUATION_MARK)
 
 /* Is the UTF8-encoded byte 'c' the first byte of a two byte sequence?  Use
  * UTF8_IS_NEXT_CHAR_DOWNGRADEABLE() instead if the input isn't known to
index 3a4fcc2..10b666a 100644 (file)
@@ -205,8 +205,17 @@ above what a 64 bit word can hold */
  * definitions. */
 
 #define UTF8_IS_START(c)               _generic_isCC(c, _CC_UTF8_IS_START)
+
+#define UTF_IS_CONTINUATION_MASK    0xE0
+
 #define UTF8_IS_CONTINUATION(c)                _generic_isCC(c, _CC_UTF8_IS_CONTINUATION)
 
+/* The above instead could be written as this:
+#define UTF8_IS_CONTINUATION(c)                                                 \
+            (((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK)                 \
+                                                == UTF_CONTINUATION_MARK)
+ */
+
 /* Equivalent to ! UVCHR_IS_INVARIANT(c) */
 #define UTF8_IS_CONTINUED(c)           cBOOL(FITS_IN_8_BITS(c)                 \
    && ! (PL_charclass[(U8) (c)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL))))