This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Since HP cc is strict c89, use -std=c89 with gcc.
[perl5.git] / utf8.h
diff --git a/utf8.h b/utf8.h
index 2357fb0..3d29706 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -382,11 +382,11 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
 #define UTF8_TWO_BYTE_HI(c)                                                    \
        (__ASSERT_((sizeof(c) ==  1)                                            \
                   || !(((WIDEST_UTYPE)(c)) & ~MAX_PORTABLE_UTF8_TWO_BYTE))     \
-        ((U8) __BASE_TWO_BYTE_HI(c, NATIVE_TO_LATIN1)))
+        ((U8) __BASE_TWO_BYTE_HI(c, NATIVE_TO_UNI)))
 #define UTF8_TWO_BYTE_LO(c)                                                    \
        (__ASSERT_((sizeof(c) ==  1)                                            \
                   || !(((WIDEST_UTYPE)(c)) & ~MAX_PORTABLE_UTF8_TWO_BYTE))     \
-        ((U8) __BASE_TWO_BYTE_LO(c, NATIVE_TO_LATIN1)))
+        ((U8) __BASE_TWO_BYTE_LO(c, NATIVE_TO_UNI)))
 
 /* This is illegal in any well-formed UTF-8 in both EBCDIC and ASCII
  * as it is only in overlongs. */
@@ -485,7 +485,9 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
  * U+10FFFF: \xF4\x8F\xBF\xBF  \xF9\xA1\xBF\xBF\xBF    max legal Unicode
  * U+110000: \xF4\x90\x80\x80  \xF9\xA2\xA0\xA0\xA0
  * U+110001: \xF4\x90\x80\x81  \xF9\xA2\xA0\xA0\xA1
- */
+ *
+ * BE AWARE that this test doesn't rule out malformed code points, in
+ * particular overlongs */
 #ifdef EBCDIC /* Both versions assume well-formed UTF8 */
 #   define UTF8_IS_SUPER(s) (NATIVE_UTF8_TO_I8(* (U8*) (s)) >= 0xF9             \
                          && (NATIVE_UTF8_TO_I8(* (U8*) (s)) > 0xF9              \
@@ -575,8 +577,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
         (ANYOF_NONBITMAP(node)) && \
         (ANYOF_FLAGS(node) & ANYOF_LOC_NONBITMAP_FOLD) && \
         ((end) > (input) + 1) && \
-        toFOLD((input)[0]) == 's' && \
-        toFOLD((input)[1]) == 's')
+        isALPHA_FOLD_EQ((input)[0], 's'))
 
 #define SHARP_S_SKIP 2