utf8.h: Add clearer #define synonyms

author Karl Williamson <khw@cpan.org>

Thu, 26 Nov 2015 03:41:39 +0000 (20:41 -0700)

committer Karl Williamson <khw@cpan.org>

Sun, 29 Nov 2015 00:19:26 +0000 (17:19 -0700)
author Karl Williamson <khw@cpan.org>
Thu, 26 Nov 2015 03:41:39 +0000 (20:41 -0700)
committer Karl Williamson <khw@cpan.org>
Sun, 29 Nov 2015 00:19:26 +0000 (17:19 -0700)
diff --git a/utf8.c b/utf8.c

index 52b6b98..7faecad 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -124,18 +124,18 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
         }
         else if (UNICODE_IS_SUPER(uv)) {
             if (   (flags & UNICODE_WARN_SUPER)
-               || (UNICODE_IS_FE_FF(uv) && (flags & UNICODE_WARN_FE_FF)))
+               || (UNICODE_IS_ABOVE_31_BIT(uv) && (flags & UNICODE_WARN_ABOVE_31_BIT)))
              {
                  Perl_ck_warner_d(aTHX_ packWARN(WARN_NON_UNICODE),
  
                    /* Choose the more dire applicable warning */
-                  (UNICODE_IS_FE_FF(uv))
+                  (UNICODE_IS_ABOVE_31_BIT(uv))
                    ? "Code point 0x%"UVXf" is not Unicode, and not portable"
                    : "Code point 0x%"UVXf" is not Unicode, may not be portable",
                   uv);
             }
             if (flags & UNICODE_DISALLOW_SUPER
-               || (UNICODE_IS_FE_FF(uv) && (flags & UNICODE_DISALLOW_FE_FF)))
+               || (UNICODE_IS_ABOVE_31_BIT(uv) && (flags & UNICODE_DISALLOW_ABOVE_31_BIT)))
             {
                 return NULL;
             }
@@ -294,8 +294,8 @@ C<UNICODE_WARN_SUPER> and C<UNICODE_DISALLOW_SUPER> flags affect the handling of
  code points that are
  above the Unicode maximum of 0x10FFFF.  Code points above 0x7FFF_FFFF (which are
  even less portable) can be warned and/or disallowed even if other above-Unicode
-code points are accepted, by the C<UNICODE_WARN_FE_FF> and
-C<UNICODE_DISALLOW_FE_FF> flags.
+code points are accepted, by the C<UNICODE_WARN_ABOVE_31_BIT> and
+C<UNICODE_DISALLOW_ABOVE_31_BIT> flags.
  
  And finally, the flag C<UNICODE_WARN_ILLEGAL_INTERCHANGE> selects all four of
  the above WARN flags; and C<UNICODE_DISALLOW_ILLEGAL_INTERCHANGE> selects all
@@ -463,11 +463,12 @@ imposed later).  (The smaller ones, those that fit into 32 bits, are
  representable by a UV on ASCII platforms, but not by an IV, which means that
  the number of operations that can be performed on them is quite restricted.)
  The UTF-8 encoding on ASCII platforms for these large code points begins with a
-byte containing 0xFE or 0xFF.  The C<UTF8_DISALLOW_FE_FF> flag will cause them to
-be treated as malformations, while allowing smaller above-Unicode code points.
+byte containing 0xFE or 0xFF.  The C<UTF8_DISALLOW_ABOVE_31_BIT> flag will
+cause them to be treated as malformations, while allowing smaller above-Unicode
+code points.
  (Of course C<UTF8_DISALLOW_SUPER> will treat all above-Unicode code points,
  including these, as malformations.)
-Similarly, C<UTF8_WARN_FE_FF> acts just like
+Similarly, C<UTF8_WARN_ABOVE_31_BIT> acts just like
  the other WARN flags, but applies just to these code points.
  
  All other code points corresponding to Unicode characters, including private
@@ -713,10 +714,8 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
               * very well may not be understood by other applications (including
               * earlier perl versions on EBCDIC platforms).  On ASCII platforms,
               * these code points are indicated by the first UTF-8 byte being
-             * 0xFE or 0xFF, hence names like 'UTF8_WARN_FE_FF'.  These names
-             * are ASCII-centric, because the criteria is different On EBCDIC
-             * platforms.  We test for these after the regular SUPER ones, and
-             * before possibly bailing out, so that the slightly more dire
+             * 0xFE or 0xFF.  We test for these after the regular SUPER ones,
+             * and before possibly bailing out, so that the slightly more dire
               * warning will override the regular one. */
              if (
  #ifndef EBCDIC
@@ -740,10 +739,11 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
                                                   || s0[6] > 0x41
                                                   || s0[7] > 0x42)
  #endif
-                && (flags & (UTF8_WARN_FE_FF|UTF8_WARN_SUPER|UTF8_DISALLOW_FE_FF)))
+                && (flags & (UTF8_WARN_ABOVE_31_BIT|UTF8_WARN_SUPER
+                            |UTF8_DISALLOW_ABOVE_31_BIT)))
              {
                  if (  ! (flags & UTF8_CHECK_ONLY)
-                    &&  (flags & (UTF8_WARN_FE_FF|UTF8_WARN_SUPER))
+                    &&  (flags & (UTF8_WARN_ABOVE_31_BIT|UTF8_WARN_SUPER))
                      &&  ckWARN_d(WARN_UTF8))
                  {
                      sv = sv_2mortal(Perl_newSVpvf(aTHX_
@@ -751,7 +751,7 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
                          uv));
                      pack_warn = packWARN(WARN_UTF8);
                  }
-                if (flags & UTF8_DISALLOW_FE_FF) {
+                if (flags & UTF8_DISALLOW_ABOVE_31_BIT) {
                      goto disallowed;
                  }
              }
diff --git a/utf8.h b/utf8.h

index c3704de..36c3852 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -538,12 +538,16 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
  #define UTF8_DISALLOW_SUPER            0x0200  /* Super-set of Unicode: code */
  #define UTF8_WARN_SUPER                        0x0400  /* points above the legal max */
  
-/* Code points which never were part of the original UTF-8 standard, the first
- * byte of which is a FE or FF on ASCII platforms. If the first byte is FF, it
- * will overflow a 32-bit word.  If the first byte is FE, it will overflow a
- * signed 32-bit word. */
-#define UTF8_DISALLOW_FE_FF            0x0800
-#define UTF8_WARN_FE_FF                        0x1000
+/* Code points which never were part of the original UTF-8 standard, which only
+ * went up to 2 ** 31 - 1.  Note that these all overflow a signed 32-bit word,
+ * The first byte of these code points is FE or FF on ASCII platforms.  If the
+ * first byte is FF, it will overflow a 32-bit word. */
+#define UTF8_DISALLOW_ABOVE_31_BIT      0x0800
+#define UTF8_WARN_ABOVE_31_BIT          0x1000
+
+/* For back compat, these old names are misleading for UTF_EBCDIC */
+#define UTF8_DISALLOW_FE_FF             UTF8_DISALLOW_ABOVE_31_BIT
+#define UTF8_WARN_FE_FF                 UTF8_WARN_ABOVE_31_BIT
  
  #define UTF8_CHECK_ONLY                        0x2000
  
@@ -553,11 +557,11 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
  #define UTF8_ALLOW_FFFF 0
  #define UTF8_ALLOW_SURROGATE 0
  
-#define UTF8_DISALLOW_ILLEGAL_INTERCHANGE                                      \
-                                (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_NONCHAR     \
-                                 |UTF8_DISALLOW_SURROGATE|UTF8_DISALLOW_FE_FF)
+#define UTF8_DISALLOW_ILLEGAL_INTERCHANGE                                       \
+                       (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_NONCHAR               \
+                        |UTF8_DISALLOW_SURROGATE|UTF8_DISALLOW_ABOVE_31_BIT)
  #define UTF8_WARN_ILLEGAL_INTERCHANGE \
-       (UTF8_WARN_SUPER|UTF8_WARN_NONCHAR|UTF8_WARN_SURROGATE|UTF8_WARN_FE_FF)
+  (UTF8_WARN_SUPER|UTF8_WARN_NONCHAR|UTF8_WARN_SURROGATE|UTF8_WARN_ABOVE_31_BIT)
  #define UTF8_ALLOW_ANY \
             (~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_WARN_ILLEGAL_INTERCHANGE))
  #define UTF8_ALLOW_ANYUV                                                        \
@@ -605,14 +609,14 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
   * let's be conservative and do as Unicode says. */
  #define PERL_UNICODE_MAX       0x10FFFF
  
-#define UNICODE_WARN_SURROGATE     0x0001      /* UTF-16 surrogates */
-#define UNICODE_WARN_NONCHAR       0x0002      /* Non-char code points */
-#define UNICODE_WARN_SUPER         0x0004      /* Above 0x10FFFF */
-#define UNICODE_WARN_FE_FF         0x0008      /* Above 0x10FFFF */
-#define UNICODE_DISALLOW_SURROGATE 0x0010
-#define UNICODE_DISALLOW_NONCHAR   0x0020
-#define UNICODE_DISALLOW_SUPER     0x0040
-#define UNICODE_DISALLOW_FE_FF     0x0080
+#define UNICODE_WARN_SURROGATE        0x0001   /* UTF-16 surrogates */
+#define UNICODE_WARN_NONCHAR          0x0002   /* Non-char code points */
+#define UNICODE_WARN_SUPER            0x0004   /* Above 0x10FFFF */
+#define UNICODE_WARN_ABOVE_31_BIT     0x0008   /* Above 0x7FFF_FFFF */
+#define UNICODE_DISALLOW_SURROGATE    0x0010
+#define UNICODE_DISALLOW_NONCHAR      0x0020
+#define UNICODE_DISALLOW_SUPER        0x0040
+#define UNICODE_DISALLOW_ABOVE_31_BIT 0x0080
  #define UNICODE_WARN_ILLEGAL_INTERCHANGE                                      \
              (UNICODE_WARN_SURROGATE|UNICODE_WARN_NONCHAR|UNICODE_WARN_SUPER)
  #define UNICODE_DISALLOW_ILLEGAL_INTERCHANGE                                  \
@@ -635,7 +639,7 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
                          * characters at all */                                \
                         || ((((c & 0xFFFE) == 0xFFFE)) && ! UNICODE_IS_SUPER(c)))
  #define UNICODE_IS_SUPER(c)            ((c) > PERL_UNICODE_MAX)
-#define UNICODE_IS_FE_FF(c)            ((c) > 0x7FFFFFFF)
+#define UNICODE_IS_ABOVE_31_BIT(uv)    ((UV) (uv) > 0x7FFFFFFF)
  
  #define LATIN_SMALL_LETTER_SHARP_S      LATIN_SMALL_LETTER_SHARP_S_NATIVE
  #define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS                                  \
author	Karl Williamson <khw@cpan.org>
	Thu, 26 Nov 2015 03:41:39 +0000 (20:41 -0700)
committer	Karl Williamson <khw@cpan.org>
	Sun, 29 Nov 2015 00:19:26 +0000 (17:19 -0700)
utf8.c		patch \| blob \| blame \| history
utf8.h		patch \| blob \| blame \| history