utf8.h: Add dummy param for when macros placed in API blead
authorKarl Williamson <khw@cpan.org>
Fri, 15 May 2015 19:49:31 +0000 (13:49 -0600)
committerKarl Williamson <khw@cpan.org>
Sat, 1 Aug 2015 16:34:50 +0000 (10:34 -0600)
These macros are not in the public API, but they might be someday.  We
may want to check for valide UTF-8 at some point.  Add a parameter so
that is possible then without having to change the API.

This also changes to use the short name of one macro

utf8.c
utf8.h

index 695daac..794649e 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -3842,7 +3842,7 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
        }
        if (UNLIKELY(*s >= UTF8_FIRST_PROBLEMATIC_CODE_POINT_FIRST_BYTE)) {
            STRLEN char_len;
-           if (UTF8_IS_SUPER(s)) {
+           if (UTF8_IS_SUPER(s, e)) {
                if (ckWARN_d(WARN_NON_UNICODE)) {
                    UV uv = utf8_to_uvchr_buf(s, e, &char_len);
                    Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
@@ -3850,7 +3850,7 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
                    ok = FALSE;
                }
            }
-           else if (UTF8_IS_SURROGATE(s)) {
+           else if (UTF8_IS_SURROGATE(s, e)) {
                if (ckWARN_d(WARN_SURROGATE)) {
                    UV uv = utf8_to_uvchr_buf(s, e, &char_len);
                    Perl_warner(aTHX_ packWARN(WARN_SURROGATE),
@@ -3858,10 +3858,7 @@ Perl_check_utf8_print(pTHX_ const U8* s, const STRLEN len)
                    ok = FALSE;
                }
            }
-           else if
-               ((UTF8_IS_NONCHAR_GIVEN_THAT_NON_SUPER_AND_GE_PROBLEMATIC(s))
-                && (ckWARN_d(WARN_NONCHAR)))
-           {
+           else if ((UTF8_IS_NONCHAR(s, e)) && (ckWARN_d(WARN_NONCHAR))) {
                UV uv = utf8_to_uvchr_buf(s, e, &char_len);
                Perl_warner(aTHX_ packWARN(WARN_NONCHAR),
                    "Unicode non-character U+%04"UVXf" is not recommended for open interchange", uv);
diff --git a/utf8.h b/utf8.h
index d4c8e55..722cbb1 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -535,7 +535,11 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
 #define UTF8_FIRST_PROBLEMATIC_CODE_POINT_FIRST_BYTE \
                                     FIRST_SURROGATE_UTF8_FIRST_BYTE
 
-#define UTF8_IS_SURROGATE(s) cBOOL(is_SURROGATE_utf8(s))
+/* Several of the macros below have a second parameter that is currently
+ * unused; but could be used in the future to make sure that the input is
+ * well-formed. */
+
+#define UTF8_IS_SURROGATE(s, e) cBOOL(is_SURROGATE_utf8(s))
 #define UTF8_IS_REPLACEMENT(s, send) cBOOL(is_REPLACEMENT_utf8_safe(s,send))
 
 /*               ASCII              EBCDIC I8
@@ -546,20 +550,20 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
  * BE AWARE that this test doesn't rule out malformed code points, in
  * particular overlongs */
 #ifdef EBCDIC /* Both versions assume well-formed UTF8 */
-#   define UTF8_IS_SUPER(s) (NATIVE_UTF8_TO_I8(* (U8*) (s)) >= 0xF9             \
+#   define UTF8_IS_SUPER(s, e) (NATIVE_UTF8_TO_I8(* (U8*) (s)) >= 0xF9          \
                          && (NATIVE_UTF8_TO_I8(* (U8*) (s)) > 0xF9              \
                              || (NATIVE_UTF8_TO_I8(* ((U8*) (s) + 1)) >= 0xA2)))
 #else
-#   define UTF8_IS_SUPER(s) (*(U8*) (s) >= 0xF4                                 \
-                            && (*(U8*) (s) > 0xF4 || (*((U8*) (s) + 1) >= 0x90)))
+#   define UTF8_IS_SUPER(s, e) (*(U8*) (s) >= 0xF4                              \
+                           && (*(U8*) (s) > 0xF4 || (*((U8*) (s) + 1) >= 0x90)))
 #endif
 
 /* These are now machine generated, and the 'given' clause is no longer
  * applicable */
-#define UTF8_IS_NONCHAR_GIVEN_THAT_NON_SUPER_AND_GE_PROBLEMATIC(s)             \
+#define UTF8_IS_NONCHAR_GIVEN_THAT_NON_SUPER_AND_GE_PROBLEMATIC(s, e)          \
                                                     cBOOL(is_NONCHAR_utf8(s))
-#define UTF8_IS_NONCHAR(s)                                                     \
-                    UTF8_IS_NONCHAR_GIVEN_THAT_NON_SUPER_AND_GE_PROBLEMATIC(s)
+#define UTF8_IS_NONCHAR(s, e)                                                  \
+                UTF8_IS_NONCHAR_GIVEN_THAT_NON_SUPER_AND_GE_PROBLEMATIC(s, e)
 
 #define UNICODE_SURROGATE_FIRST                0xD800
 #define UNICODE_SURROGATE_LAST         0xDFFF