This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add is_utf8_valid_partial_char_flags()
authorKarl Williamson <khw@cpan.org>
Mon, 12 Sep 2016 04:18:57 +0000 (22:18 -0600)
committerKarl Williamson <khw@cpan.org>
Sun, 18 Sep 2016 03:10:50 +0000 (21:10 -0600)
This is a generalization of is_utf8_valid_partial_char to allow the
caller to automatically exclude things such as surrogates.

embed.fnc
embed.h
inline.h
proto.h

index 450a486..3bdc426 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -744,7 +744,10 @@ Abmnpd     |STRLEN |is_utf8_char_buf|NN const U8 *buf|NN const U8 *buf_end
 AnipdP |bool   |is_utf8_string |NN const U8 *s|STRLEN len
 Anpdmb |bool   |is_utf8_string_loc|NN const U8 *s|STRLEN len|NN const U8 **ep
 Anipd  |bool   |is_utf8_string_loclen|NN const U8 *s|STRLEN len|NULLOK const U8 **ep|NULLOK STRLEN *el
-AnidP  |bool   |is_utf8_valid_partial_char|NN const U8 * const s|NN const U8 * const e
+AmndP  |bool   |is_utf8_valid_partial_char                                 \
+               |NN const U8 * const s|NN const U8 * const e
+AnidP  |bool   |is_utf8_valid_partial_char_flags                           \
+               |NN const U8 * const s|NN const U8 * const e|const U32 flags
 AMpR   |bool   |_is_uni_FOO|const U8 classnum|const UV c
 AMpR   |bool   |_is_utf8_FOO|const U8 classnum|NN const U8 *p
 ADMpR  |bool   |is_utf8_alnum  |NN const U8 *p
diff --git a/embed.h b/embed.h
index 8ff1c93..50a19a4 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define is_utf8_string         Perl_is_utf8_string
 #define is_utf8_string_loclen  Perl_is_utf8_string_loclen
 #define is_utf8_upper(a)       Perl_is_utf8_upper(aTHX_ a)
-#define is_utf8_valid_partial_char     S_is_utf8_valid_partial_char
+#define is_utf8_valid_partial_char_flags       S_is_utf8_valid_partial_char_flags
 #define is_utf8_xdigit(a)      Perl_is_utf8_xdigit(aTHX_ a)
 #define is_utf8_xidcont(a)     Perl_is_utf8_xidcont(aTHX_ a)
 #define is_utf8_xidfirst(a)    Perl_is_utf8_xidfirst(aTHX_ a)
index 41d0a9c..44fb484 100644 (file)
--- a/inline.h
+++ b/inline.h
@@ -526,17 +526,42 @@ failure can be signalled without having to wait for the next read.
 
 =cut
 */
+#define is_utf8_valid_partial_char(s, e) is_utf8_valid_partial_char_flags(s, e, 0)
+
+/*
+
+=for apidoc is_utf8_valid_partial_char_flags
+
+Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
+or not the input is a valid UTF-8 encoded partial character, but it takes an
+extra parameter, C<flags>, which can further restrict which code points are
+considered valid.
+
+If C<flags> is 0, this behaves identically to
+C<L</is_utf8_valid_partial_char>>.  Otherwise C<flags> can be any combination
+of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>.  If
+there is any sequence of bytes that can complete the input partial character in
+such a way that a non-prohibited character is formed, the function returns
+TRUE; otherwise FALSE.  Non characters cannot be determined based on partial
+character input.  But many  of the other possible excluded types can be
+determined from just the first one or two bytes.
+
+=cut
+ */
+
 PERL_STATIC_INLINE bool
-S_is_utf8_valid_partial_char(const U8 * const s, const U8 * const e)
+S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
 {
+    PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
 
-    PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR;
+    assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+                          |UTF8_DISALLOW_ABOVE_31_BIT)));
 
     if (s >= e || s + UTF8SKIP(s) <= e) {
         return FALSE;
     }
 
-    return cBOOL(_is_utf8_char_helper(s, e, 0));
+    return cBOOL(_is_utf8_char_helper(s, e, flags));
 }
 
 /* ------------------------------- perl.h ----------------------------- */
diff --git a/proto.h b/proto.h
index c20986b..b13b42e 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -1628,9 +1628,12 @@ PERL_CALLCONV bool       Perl_is_utf8_upper(pTHX_ const U8 *p)
 #define PERL_ARGS_ASSERT_IS_UTF8_UPPER \
        assert(p)
 
-PERL_STATIC_INLINE bool        S_is_utf8_valid_partial_char(const U8 * const s, const U8 * const e)
+/* PERL_CALLCONV bool  is_utf8_valid_partial_char(const U8 * const s, const U8 * const e)
+                       __attribute__pure__; */
+
+PERL_STATIC_INLINE bool        S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
                        __attribute__pure__;
-#define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR    \
+#define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS      \
        assert(s); assert(e)
 
 PERL_CALLCONV bool     Perl_is_utf8_xdigit(pTHX_ const U8 *p)