This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
The choice of 7 or 13 byte extended UTF-8 should be based on UVSIZE.
authorNicholas Clark <nick@ccl4.org>
Wed, 11 Sep 2013 11:18:43 +0000 (12:18 +0100)
committerNicholas Clark <nick@ccl4.org>
Tue, 17 Sep 2013 11:57:53 +0000 (13:57 +0200)
Previously it was based on HAS_QUAD, which is not (as) correct.

utf8.c
utf8.h

diff --git a/utf8.c b/utf8.c
index 2e157df..f07e8ec 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -190,7 +190,7 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
        *d++ = (U8)(( uv        & 0x3f) | 0x80);
        return d;
     }
-#ifdef HAS_QUAD
+#ifdef UTF8_QUAD_MAX
     if (uv < UTF8_QUAD_MAX)
 #endif
     {
@@ -203,7 +203,7 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
        *d++ = (U8)(( uv        & 0x3f) | 0x80);
        return d;
     }
-#ifdef HAS_QUAD
+#ifdef UTF8_QUAD_MAX
     {
        *d++ =                            0xff;         /* Can't match U+FFFE! */
        *d++ =                            0x80;         /* 6 Reserved bits */
diff --git a/utf8.h b/utf8.h
index 5880aa3..76b89a4 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -233,7 +233,9 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
     (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS)           \
            - UTF_ACCUMULATION_SHIFT))
 
-#ifdef HAS_QUAD
+#if UVSIZE >= 8
+#  define UTF8_QUAD_MAX UINT64_C(0x1000000000)
+
 /* Input is a true Unicode (not-native) code point */
 #define OFFUNISKIP(uv) ( (uv) < 0x80        ? 1 : \
                      (uv) < 0x800          ? 2 : \
@@ -521,10 +523,6 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
 #define UNICODE_IS_SUPER(c)            ((c) > PERL_UNICODE_MAX)
 #define UNICODE_IS_FE_FF(c)            ((c) > 0x7FFFFFFF)
 
-#ifdef HAS_QUAD
-#    define UTF8_QUAD_MAX      UINT64_C(0x1000000000)
-#endif
-
 #define LATIN_SMALL_LETTER_SHARP_S      LATIN_SMALL_LETTER_SHARP_S_NATIVE
 #define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS                                  \
                                 LATIN_SMALL_LETTER_Y_WITH_DIAERESIS_NATIVE