From 93e6dbd62d3750e59ca281bcd629990cc1627122 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 25 Nov 2015 22:35:53 -0700 Subject: [PATCH] utf8.h: Remove use of redundant flags The ABOVE_31_BIT flags is a proper subset of the SUPER flags, so if the latter is set, we don't have to bother setting the former. On the other hand, there is no harm in doing so, these changes are all resolved at compile time. The reason I'm changing them is that it is easier to explain in the pod what is happening, in the next commit. --- utf8.c | 23 ++++++++++++++++------- utf8.h | 11 ++++++----- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/utf8.c b/utf8.c index 7faecad..696686f 100644 --- a/utf8.c +++ b/utf8.c @@ -292,15 +292,18 @@ The C and C flags affect how the function handles a Unicode non-character. And likewise, the C and C flags affect the handling of code points that are -above the Unicode maximum of 0x10FFFF. Code points above 0x7FFF_FFFF (which are +above the Unicode maximum of 0x10FFFF. + + +The flag C selects all three of +the above WARN flags; and C selects all +three DISALLOW flags. + +Code points above 0x7FFF_FFFF (which are even less portable) can be warned and/or disallowed even if other above-Unicode code points are accepted, by the C and C flags. -And finally, the flag C selects all four of -the above WARN flags; and C selects all -four DISALLOW flags. - =cut */ @@ -682,8 +685,14 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) /* Here, the input is considered to be well-formed, but it still could be a * problematic code point that is not allowed by the input parameters. */ if (uv >= UNICODE_SURROGATE_FIRST /* isn't problematic if < this */ - && (flags & (UTF8_DISALLOW_ILLEGAL_INTERCHANGE - |UTF8_WARN_ILLEGAL_INTERCHANGE))) + && (flags & ( UTF8_DISALLOW_NONCHAR + |UTF8_DISALLOW_SURROGATE + |UTF8_DISALLOW_SUPER + |UTF8_DISALLOW_ABOVE_31_BIT + |UTF8_WARN_NONCHAR + |UTF8_WARN_SURROGATE + |UTF8_WARN_SUPER + |UTF8_WARN_ABOVE_31_BIT))) { if (UNICODE_IS_SURROGATE(uv)) { diff --git a/utf8.h b/utf8.h index 36c3852..84766b7 100644 --- a/utf8.h +++ b/utf8.h @@ -558,12 +558,13 @@ case any call to string overloading updates the internal UTF-8 encoding flag. #define UTF8_ALLOW_SURROGATE 0 #define UTF8_DISALLOW_ILLEGAL_INTERCHANGE \ - (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_NONCHAR \ - |UTF8_DISALLOW_SURROGATE|UTF8_DISALLOW_ABOVE_31_BIT) + ( UTF8_DISALLOW_SUPER|UTF8_DISALLOW_NONCHAR \ + |UTF8_DISALLOW_SURROGATE) #define UTF8_WARN_ILLEGAL_INTERCHANGE \ - (UTF8_WARN_SUPER|UTF8_WARN_NONCHAR|UTF8_WARN_SURROGATE|UTF8_WARN_ABOVE_31_BIT) -#define UTF8_ALLOW_ANY \ - (~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_WARN_ILLEGAL_INTERCHANGE)) + (UTF8_WARN_SUPER|UTF8_WARN_NONCHAR|UTF8_WARN_SURROGATE) +#define UTF8_ALLOW_ANY \ + (~( UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_DISALLOW_ABOVE_31_BIT \ + |UTF8_WARN_ILLEGAL_INTERCHANGE|UTF8_WARN_ABOVE_31_BIT)) #define UTF8_ALLOW_ANYUV \ (UTF8_ALLOW_EMPTY \ & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_WARN_ILLEGAL_INTERCHANGE)) -- 1.8.3.1