From 858cd8abc358948a5494c38ce06177d2cfb7607e Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 8 Nov 2015 22:13:44 -0700 Subject: [PATCH] utf8.h, utfebcdic.h: Add #define for future use --- utf8.h | 6 +++++- utfebcdic.h | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/utf8.h b/utf8.h index aefbe37..c57576b 100644 --- a/utf8.h +++ b/utf8.h @@ -221,9 +221,13 @@ Perl's extended UTF-8 means we can have start bytes up to FF. * illegal overlong sequences that begin with C0 and C1. */ #define UTF8_IS_START(c) (((U8)c) >= 0xc2) +/* For use in UTF8_IS_CONTINUATION() below */ +#define UTF_IS_CONTINUATION_MASK 0xC0 + /* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the * first byte thereof? */ -#define UTF8_IS_CONTINUATION(c) ((((U8)c) & 0xC0) == UTF_CONTINUATION_MARK) +#define UTF8_IS_CONTINUATION(c) \ + ((((U8)c) & UTF_IS_CONTINUATION_MASK) == UTF_CONTINUATION_MARK) /* Is the UTF8-encoded byte 'c' the first byte of a two byte sequence? Use * UTF8_IS_NEXT_CHAR_DOWNGRADEABLE() instead if the input isn't known to diff --git a/utfebcdic.h b/utfebcdic.h index 3a4fcc2..10b666a 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -205,8 +205,17 @@ above what a 64 bit word can hold */ * definitions. */ #define UTF8_IS_START(c) _generic_isCC(c, _CC_UTF8_IS_START) + +#define UTF_IS_CONTINUATION_MASK 0xE0 + #define UTF8_IS_CONTINUATION(c) _generic_isCC(c, _CC_UTF8_IS_CONTINUATION) +/* The above instead could be written as this: +#define UTF8_IS_CONTINUATION(c) \ + (((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK) \ + == UTF_CONTINUATION_MARK) + */ + /* Equivalent to ! UVCHR_IS_INVARIANT(c) */ #define UTF8_IS_CONTINUED(c) cBOOL(FITS_IN_8_BITS(c) \ && ! (PL_charclass[(U8) (c)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL)))) -- 1.8.3.1