Data::Dumper: Generalize for EBCDIC platforms

[perl5.git] / utf8.h
diff --git a/utf8.h b/utf8.h

index 3d29706..8418055 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -39,6 +39,20 @@
  #define _CORE_SWASH_INIT_RETURN_IF_UNDEF       0x2
  #define _CORE_SWASH_INIT_ACCEPT_INVLIST        0x4
  
+/*
+=head1 Unicode Support
+
+=for apidoc is_ascii_string
+
+This is a misleadingly-named synonym for L</is_invariant_string>.
+On ASCII-ish platforms, the name isn't misleading: the ASCII-range characters
+are exactly the UTF-8 invariants.  But EBCDIC machines have more invariants
+than just the ASCII characters, so C<is_invariant_string> is preferred.
+
+=cut
+*/
+#define is_ascii_string(s, len)     is_invariant_string(s, len)
+
  #define uvchr_to_utf8(a,b)          uvchr_to_utf8_flags(a,b,0)
  #define uvchr_to_utf8_flags(d,uv,flags)                                        \
                              uvoffuni_to_utf8_flags(d,NATIVE_TO_UNI(uv),flags)
@@ -61,6 +75,8 @@
  #define FOLDEQ_LOCALE             (1 << 1)
  #define FOLDEQ_S1_ALREADY_FOLDED  (1 << 2)
  #define FOLDEQ_S2_ALREADY_FOLDED  (1 << 3)
+#define FOLDEQ_S1_FOLDS_SANE      (1 << 4)
+#define FOLDEQ_S2_FOLDS_SANE      (1 << 5)
  
  #define ibcmp_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \
                     cBOOL(! foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2))
@@ -377,15 +393,15 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
   * code point whose UTF-8 is known to occupy 2 bytes; they are less efficient
   * than the EIGHT_BIT versions on EBCDIC platforms.  We use the logical '~'
   * operator instead of "<=" to avoid getting compiler warnings.
- * MAX_PORTABLE_UTF8_TWO_BYTE should be exactly all one bits in the lower few
+ * MAX_UTF8_TWO_BYTE should be exactly all one bits in the lower few
   * places, so the ~ works */
  #define UTF8_TWO_BYTE_HI(c)                                                    \
         (__ASSERT_((sizeof(c) ==  1)                                            \
-                  || !(((WIDEST_UTYPE)(c)) & ~MAX_PORTABLE_UTF8_TWO_BYTE))     \
+                  || !(((WIDEST_UTYPE)(c)) & ~MAX_UTF8_TWO_BYTE))              \
          ((U8) __BASE_TWO_BYTE_HI(c, NATIVE_TO_UNI)))
  #define UTF8_TWO_BYTE_LO(c)                                                    \
         (__ASSERT_((sizeof(c) ==  1)                                            \
-                  || !(((WIDEST_UTYPE)(c)) & ~MAX_PORTABLE_UTF8_TWO_BYTE))     \
+                  || !(((WIDEST_UTYPE)(c)) & ~MAX_UTF8_TWO_BYTE))              \
          ((U8) __BASE_TWO_BYTE_LO(c, NATIVE_TO_UNI)))
  
  /* This is illegal in any well-formed UTF-8 in both EBCDIC and ASCII
@@ -625,7 +641,6 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
  #endif
  
  /*
-=head1 Unicode Support
  
  =for apidoc Am|STRLEN|isUTF8_CHAR|const U8 *s|const U8 *e