Update Socket version in Maintainers.pl

[perl5.git] / utf8.h
diff --git a/utf8.h b/utf8.h

index 74e7d48..613389c 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -62,13 +62,6 @@
  #define FOLDEQ_S1_ALREADY_FOLDED  (1 << 2)
  #define FOLDEQ_S2_ALREADY_FOLDED  (1 << 3)
  
-/*
-=for apidoc ibcmp_utf8
-
-This is a synonym for (! foldEQ_utf8())
-
-=cut
-*/
  #define ibcmp_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \
                     cBOOL(! foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2))
  
@@ -270,6 +263,10 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
  #error UTF8_MAXBYTES must be at least 12
  #endif
  
+/* ^? is defined to be DEL on ASCII systems.  See the definition of toCTRL()
+ * for more */
+#define QUESTION_MARK_CTRL  DEL_NATIVE
+
  #define MAX_UTF8_TWO_BYTE 0x7FF
  
  #define UTF8_MAXBYTES_CASE     UTF8_MAXBYTES
@@ -418,8 +415,11 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
  #define IN_BYTES (CopHINTS_get(PL_curcop) & HINT_BYTES)
  #define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTES)
  #define IN_UNI_8_BIT \
-           (CopHINTS_get(PL_curcop) & (HINT_UNI_8_BIT|HINT_LOCALE_NOT_CHARS) \
-            && ! IN_LOCALE_RUNTIME && ! IN_BYTES)
+           (((CopHINTS_get(PL_curcop) & (HINT_UNI_8_BIT))                       \
+               || (CopHINTS_get(PL_curcop) & HINT_LOCALE_PARTIAL                 \
+                   /* -1 below is for :not_characters */                         \
+                   && _is_in_locale_category(FALSE, -1)))                        \
+              && ! IN_BYTES)
  
  
  #define UTF8_ALLOW_EMPTY               0x0001  /* Allow a zero length string */
@@ -485,7 +485,9 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
   * U+10FFFF: \xF4\x8F\xBF\xBF  \xF9\xA1\xBF\xBF\xBF    max legal Unicode
   * U+110000: \xF4\x90\x80\x80  \xF9\xA2\xA0\xA0\xA0
   * U+110001: \xF4\x90\x80\x81  \xF9\xA2\xA0\xA0\xA1
- */
+ *
+ * BE AWARE that this test doesn't rule out malformed code points, in
+ * particular overlongs */
  #ifdef EBCDIC /* Both versions assume well-formed UTF8 */
  #   define UTF8_IS_SUPER(s) (NATIVE_UTF8_TO_I8(* (U8*) (s)) >= 0xF9             \
                           && (NATIVE_UTF8_TO_I8(* (U8*) (s)) > 0xF9              \
@@ -639,13 +641,13 @@ machines) is a valid UTF-8 character.
  =cut
  */
  
-#define isUTF8_CHAR(s, e)   (((e) <= (s))                                   \
+#define isUTF8_CHAR(s, e)   (UNLIKELY((e) <= (s))                           \
                               ? 0                                            \
                               : (UTF8_IS_INVARIANT(*s))                      \
                                 ? 1                                          \
-                               : (((e) - (s)) < UTF8SKIP(s))                \
+                               : UNLIKELY(((e) - (s)) < UTF8SKIP(s))        \
                                   ? 0                                        \
-                                 : (IS_UTF8_CHAR_FAST(UTF8SKIP(s)))         \
+                                 : LIKELY(IS_UTF8_CHAR_FAST(UTF8SKIP(s)))   \
                                     ? is_UTF8_CHAR_utf8_no_length_checks(s)  \
                                     : _is_utf8_char_slow(s, e))