This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove "Warning: perl appears in your path"
[perl5.git] / utf8.h
diff --git a/utf8.h b/utf8.h
index 7cd163a..a93519a 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -72,6 +72,8 @@ the string is invariant.
 #define utf8_to_uvchr_buf(s, e, lenp)                                          \
                      utf8n_to_uvchr(s, (U8*)(e) - (U8*)(s), lenp,              \
                                     ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY)
+#define utf8n_to_uvchr(s, len, lenp, flags)                                    \
+                                utf8n_to_uvchr_error(s, len, lenp, flags, 0)
 
 #define to_uni_fold(c, p, lenp) _to_uni_fold_flags(c, p, lenp, FOLD_FLAGS_FULL)
 #define to_utf8_fold(c, p, lenp) _to_utf8_fold_flags(c, p, lenp, FOLD_FLAGS_FULL)
@@ -526,13 +528,6 @@ encoded as UTF-8.  C<cp> is a native (ASCII or EBCDIC) code point if less than
                                    | ((NATIVE_UTF8_TO_I8((U8)new))             \
                                        & UTF_CONTINUATION_MASK))
 
-/* If a value is anded with this, and the result is non-zero, then using the
- * original value in UTF8_ACCUMULATE will overflow, shifting bits off the left
- * */
-#define UTF_ACCUMULATION_OVERFLOW_MASK                                 \
-    (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS)           \
-           - UTF_ACCUMULATION_SHIFT))
-
 /* This works in the face of malformed UTF-8. */
 #define UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, e) (UTF8_IS_DOWNGRADEABLE_START(*s) \
                                                && ( (e) - (s) > 1)             \
@@ -718,26 +713,37 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
 
 
 #define UTF8_ALLOW_EMPTY               0x0001  /* Allow a zero length string */
+#define UTF8_GOT_EMPTY                  UTF8_ALLOW_EMPTY
 
 /* Allow first byte to be a continuation byte */
 #define UTF8_ALLOW_CONTINUATION                0x0002
+#define UTF8_GOT_CONTINUATION          UTF8_ALLOW_CONTINUATION
 
-/* Allow second... bytes to be non-continuation bytes */
+/* Unexpected continuation byte */
 #define UTF8_ALLOW_NON_CONTINUATION    0x0004
+#define UTF8_GOT_NON_CONTINUATION      UTF8_ALLOW_NON_CONTINUATION
 
 /* expecting more bytes than were available in the string */
 #define UTF8_ALLOW_SHORT               0x0008
+#define UTF8_GOT_SHORT                 UTF8_ALLOW_SHORT
 
 /* Overlong sequence; i.e., the code point can be specified in fewer bytes. */
 #define UTF8_ALLOW_LONG                 0x0010
+#define UTF8_GOT_LONG                   UTF8_ALLOW_LONG
+
+/* Currently no way to allow overflow */
+#define UTF8_GOT_OVERFLOW               0x0020
 
 #define UTF8_DISALLOW_SURROGATE                0x0040  /* Unicode surrogates */
+#define UTF8_GOT_SURROGATE             UTF8_DISALLOW_SURROGATE
 #define UTF8_WARN_SURROGATE            0x0080
 
 #define UTF8_DISALLOW_NONCHAR           0x0100 /* Unicode non-character */
+#define UTF8_GOT_NONCHAR                UTF8_DISALLOW_NONCHAR
 #define UTF8_WARN_NONCHAR               0x0200 /*  code points */
 
 #define UTF8_DISALLOW_SUPER            0x0400  /* Super-set of Unicode: code */
+#define UTF8_GOT_SUPER                 UTF8_DISALLOW_SUPER
 #define UTF8_WARN_SUPER                        0x0800  /* points above the legal max */
 
 /* Code points which never were part of the original UTF-8 standard, which only
@@ -745,6 +751,7 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
  * The first byte of these code points is FE or FF on ASCII platforms.  If the
  * first byte is FF, it will overflow a 32-bit word. */
 #define UTF8_DISALLOW_ABOVE_31_BIT      0x1000
+#define UTF8_GOT_ABOVE_31_BIT           UTF8_DISALLOW_ABOVE_31_BIT
 #define UTF8_WARN_ABOVE_31_BIT          0x2000
 
 /* For back compat, these old names are misleading for UTF_EBCDIC */
@@ -772,9 +779,7 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
 #define UTF8_ALLOW_ANY                                                          \
            (~( UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_DISALLOW_ABOVE_31_BIT    \
                |UTF8_WARN_ILLEGAL_INTERCHANGE|UTF8_WARN_ABOVE_31_BIT))
-#define UTF8_ALLOW_ANYUV                                                        \
-         (UTF8_ALLOW_EMPTY                                                      \
-         & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_WARN_ILLEGAL_INTERCHANGE))
+#define UTF8_ALLOW_ANYUV  UTF8_ALLOW_EMPTY
 #define UTF8_ALLOW_DEFAULT             (ckWARN(WARN_UTF8) ? 0 : \
                                         UTF8_ALLOW_ANYUV)