This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8.h: Add synonyms for flag names
authorKarl Williamson <khw@cpan.org>
Mon, 26 Jun 2017 17:43:21 +0000 (11:43 -0600)
committerKarl Williamson <khw@cpan.org>
Thu, 13 Jul 2017 03:14:25 +0000 (21:14 -0600)
The next commit will fix the detection of using Perl's extended UTF-8 to
be more accurate.  The current name for various flags in the API is
somewhat misleading.  What is really wanted to know is if extended UTF-8
was used, not the value of the resultant code point.

This commit basically does

    s/ABOVE_31_BIT/PERL_EXTENDED/g

It also similarly changes the name of a hash key in APItest/t/utf8.t.

This intermediary step makes the next commit easier to read.

ext/XS-APItest/t/utf8.t
ext/XS-APItest/t/utf8_setup.pl
ext/XS-APItest/t/utf8_warn_base.pl
inline.h
utf8.c
utf8.h

index b057837..37c65aa 100644 (file)
@@ -29,8 +29,8 @@ my $look_for_everything_utf8n_to
                        | $::UTF8_WARN_NONCHAR
                        | $::UTF8_DISALLOW_SUPER
                        | $::UTF8_WARN_SUPER
-                       | $::UTF8_DISALLOW_ABOVE_31_BIT
-                       | $::UTF8_WARN_ABOVE_31_BIT;
+                       | $::UTF8_DISALLOW_PERL_EXTENDED
+                       | $::UTF8_WARN_PERL_EXTENDED;
 my $look_for_everything_uvchr_to
                         = $::UNICODE_DISALLOW_SURROGATE
                        | $::UNICODE_WARN_SURROGATE
@@ -38,8 +38,8 @@ my $look_for_everything_uvchr_to
                        | $::UNICODE_WARN_NONCHAR
                        | $::UNICODE_DISALLOW_SUPER
                        | $::UNICODE_WARN_SUPER
-                       | $::UNICODE_DISALLOW_ABOVE_31_BIT
-                       | $::UNICODE_WARN_ABOVE_31_BIT;
+                       | $::UNICODE_DISALLOW_PERL_EXTENDED
+                       | $::UNICODE_WARN_PERL_EXTENDED;
 
 foreach ([0, '', '', 'empty'],
         [0, 'N', 'N', '1 char'],
@@ -620,15 +620,15 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) }
 
     my $valid_under_strict = 1;
     my $valid_under_c9strict = 1;
-    my $valid_for_fits_in_31_bits = 1;
+    my $valid_for_not_extended_utf8 = 1;
     if ($n > 0x10FFFF) {
         $this_utf8_flags &= ~($::UTF8_DISALLOW_SUPER|$::UTF8_WARN_SUPER);
         $valid_under_strict = 0;
         $valid_under_c9strict = 0;
         if ($n > 2 ** 31 - 1) {
             $this_utf8_flags &=
-                        ~($::UTF8_DISALLOW_ABOVE_31_BIT|$::UTF8_WARN_ABOVE_31_BIT);
-            $valid_for_fits_in_31_bits = 0;
+                ~($::UTF8_DISALLOW_PERL_EXTENDED|$::UTF8_WARN_PERL_EXTENDED);
+            $valid_for_not_extended_utf8 = 0;
         }
     }
     elsif (($n >= 0xFDD0 && $n <= 0xFDEF) || ($n & 0xFFFE) == 0xFFFE) {
@@ -784,17 +784,18 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) }
     my $this_uvchr_flags = $look_for_everything_uvchr_to;
     if ($n > 2 ** 31 - 1) {
         $this_uvchr_flags &=
-                ~($::UNICODE_DISALLOW_ABOVE_31_BIT|$::UNICODE_WARN_ABOVE_31_BIT);
+            ~($::UNICODE_DISALLOW_PERL_EXTENDED|$::UNICODE_WARN_PERL_EXTENDED);
     }
     if ($n > 0x10FFFF) {
         $this_uvchr_flags &= ~($::UNICODE_DISALLOW_SUPER|$::UNICODE_WARN_SUPER);
     }
     elsif (($n >= 0xFDD0 && $n <= 0xFDEF) || ($n & 0xFFFE) == 0xFFFE) {
-        $this_uvchr_flags &= ~($::UNICODE_DISALLOW_NONCHAR|$::UNICODE_WARN_NONCHAR);
+        $this_uvchr_flags
+                     &= ~($::UNICODE_DISALLOW_NONCHAR|$::UNICODE_WARN_NONCHAR);
     }
     elsif ($n >= 0xD800 && $n <= 0xDFFF) {
         $this_uvchr_flags
-                     &= ~($::UNICODE_DISALLOW_SURROGATE|$::UNICODE_WARN_SURROGATE);
+                &= ~($::UNICODE_DISALLOW_SURROGATE|$::UNICODE_WARN_SURROGATE);
     }
     $display_flags = sprintf "0x%x", $this_uvchr_flags;
 
@@ -844,17 +845,17 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) }
                                 = $restriction_types{"strict"}{'valid_counts'};
     }
 
-    if ($valid_for_fits_in_31_bits) {
-        $restriction_types{"fits_in_31_bits"}{'valid_strings'} .= $bytes;
-        $restriction_types{"fits_in_31_bits"}{'valid_counts'}++;
+    if ($valid_for_not_extended_utf8) {
+        $restriction_types{"not_extended_utf8"}{'valid_strings'} .= $bytes;
+        $restriction_types{"not_extended_utf8"}{'valid_counts'}++;
     }
     elsif (! exists
-                $restriction_types{"fits_in_31_bits"}{'first_invalid_offset'})
+                $restriction_types{"not_extended_utf8"}{'first_invalid_offset'})
     {
-        $restriction_types{"fits_in_31_bits"}{'first_invalid_offset'}
-                = length $restriction_types{"fits_in_31_bits"}{'valid_strings'};
-        $restriction_types{"fits_in_31_bits"}{'first_invalid_count'}
-                        = $restriction_types{"fits_in_31_bits"}{'valid_counts'};
+        $restriction_types{"not_extended_utf8"}{'first_invalid_offset'}
+                = length $restriction_types{"not_extended_utf8"}{'valid_strings'};
+        $restriction_types{"not_extended_utf8"}{'first_invalid_count'}
+                        = $restriction_types{"not_extended_utf8"}{'valid_counts'};
     }
 }
 
@@ -874,7 +875,7 @@ for my $restriction (sort keys %restriction_types) {
         # and the specially named foo function.  But not if there isn't such a
         # specially named function.  Currently, this is the only tested
         # restriction that doesn't have a specially named function
-        next if $use_flags eq "" && $restriction eq "fits_in_31_bits";
+        next if $use_flags eq "" && $restriction eq "not_extended_utf8";
 
         # Start building up the name of the function we will test.
         my $base_name = "is_";
@@ -994,8 +995,8 @@ for my $restriction (sort keys %restriction_types) {
                             elsif ($restriction eq "strict") {
                                 $test .= ", $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE";
                             }
-                            elsif ($restriction eq "fits_in_31_bits") {
-                                $test .= ", $::UTF8_DISALLOW_ABOVE_31_BIT";
+                            elsif ($restriction eq "not_extended_utf8") {
+                                $test .= ", $::UTF8_DISALLOW_PERL_EXTENDED";
                             }
                             else {
                                 fail("Internal test error: Unknown restriction "
index 62b0649..ec7a5ce 100644 (file)
@@ -82,9 +82,9 @@ $::UTF8_WARN_NONCHAR           = 0x0800;
 $::UTF8_DISALLOW_SUPER         = 0x1000;
 $::UTF8_GOT_SUPER              = $UTF8_DISALLOW_SUPER;
 $::UTF8_WARN_SUPER             = 0x2000;
-$::UTF8_DISALLOW_ABOVE_31_BIT  = 0x4000;
-$::UTF8_GOT_ABOVE_31_BIT       = $UTF8_DISALLOW_ABOVE_31_BIT;
-$::UTF8_WARN_ABOVE_31_BIT      = 0x8000;
+$::UTF8_DISALLOW_PERL_EXTENDED  = 0x4000;
+$::UTF8_GOT_PERL_EXTENDED       = $UTF8_DISALLOW_PERL_EXTENDED;
+$::UTF8_WARN_PERL_EXTENDED      = 0x8000;
 $::UTF8_CHECK_ONLY             = 0x10000;
 $::UTF8_NO_CONFIDENCE_IN_CURLEN_ = 0x20000;
 
@@ -101,8 +101,8 @@ $::UTF8_WARN_ILLEGAL_INTERCHANGE
 $::UNICODE_WARN_SURROGATE        = 0x0001;
 $::UNICODE_WARN_NONCHAR          = 0x0002;
 $::UNICODE_WARN_SUPER            = 0x0004;
-$::UNICODE_WARN_ABOVE_31_BIT     = 0x0008;
+$::UNICODE_WARN_PERL_EXTENDED     = 0x0008;
 $::UNICODE_DISALLOW_SURROGATE    = 0x0010;
 $::UNICODE_DISALLOW_NONCHAR      = 0x0020;
 $::UNICODE_DISALLOW_SUPER        = 0x0040;
-$::UNICODE_DISALLOW_ABOVE_31_BIT = 0x0080;
+$::UNICODE_DISALLOW_PERL_EXTENDED = 0x0080;
index 619a554..94df88e 100644 (file)
@@ -437,8 +437,8 @@ my @utf8n_flags_to_text =  ( qw(
         W_NONCHAR
         D_SUPER
         W_SUPER
-        D_ABOVE_31_BIT
-        W_ABOVE_31_BIT
+        D_PERL_EXTENDED
+        W_PERL_EXTENDED
         CHECK_ONLY
         NO_CONFIDENCE_IN_CURLEN_
     ) );
@@ -475,11 +475,11 @@ sub uvchr_display_call($)
             W_SURROGATE
             W_NONCHAR
             W_SUPER
-            W_ABOVE_31_BIT
+            W_PERL_EXTENDED
             D_SURROGATE
             D_NONCHAR
             D_SUPER
-            D_ABOVE_31_BIT
+            D_PERL_EXTENDED
        ) );
 
     $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x;
@@ -594,15 +594,15 @@ foreach my $test (@tests) {
 
     if ($will_overflow || $allowed_uv > 0x10FFFF) {
 
-        # Set the SUPER flags; later, we test for ABOVE_31_BIT as well.
+        # Set the SUPER flags; later, we test for PERL_EXTENDED as well.
         $utf8n_flag_to_warn     = $::UTF8_WARN_SUPER;
         $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER;
         $uvchr_flag_to_warn     = $::UNICODE_WARN_SUPER;
         $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;;
 
-        # Below, we add the flags for non-above-31 bit to the code points that
-        # don't fit that category.  Special tests are done for this category
-        # in the inner loop.
+        # Below, we add the flags for non-perl_extended to the code points
+        # that don't fit that category.  Special tests are done for this
+        # category in the inner loop.
         $utf8n_flag_to_warn_complement     = $::UTF8_WARN_NONCHAR
                                             |$::UTF8_WARN_SURROGATE;
         $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
@@ -628,11 +628,12 @@ foreach my $test (@tests) {
                                 \Q may not be portable\E/x;
             $non_cp_trailing_text = "is for a non-Unicode code point, may not"
                                 . " be portable";
-            $utf8n_flag_to_warn_complement     |= $::UTF8_WARN_ABOVE_31_BIT;
-            $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_ABOVE_31_BIT;
-            $uvchr_flag_to_warn_complement     |= $::UNICODE_WARN_ABOVE_31_BIT;
+            $utf8n_flag_to_warn_complement     |= $::UTF8_WARN_PERL_EXTENDED;
+            $utf8n_flag_to_disallow_complement
+                                           |= $::UTF8_DISALLOW_PERL_EXTENDED;
+            $uvchr_flag_to_warn_complement |= $::UNICODE_WARN_PERL_EXTENDED;
             $uvchr_flag_to_disallow_complement
-                                            |= $::UNICODE_DISALLOW_ABOVE_31_BIT;
+                                        |= $::UNICODE_DISALLOW_PERL_EXTENDED;
         }
     }
     elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) {
@@ -648,16 +649,16 @@ foreach my $test (@tests) {
 
         $utf8n_flag_to_warn_complement     = $::UTF8_WARN_NONCHAR
                                             |$::UTF8_WARN_SUPER
-                                            |$::UTF8_WARN_ABOVE_31_BIT;
+                                            |$::UTF8_WARN_PERL_EXTENDED;
         $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
                                             |$::UTF8_DISALLOW_SUPER
-                                            |$::UTF8_DISALLOW_ABOVE_31_BIT;
+                                            |$::UTF8_DISALLOW_PERL_EXTENDED;
         $uvchr_flag_to_warn_complement     = $::UNICODE_WARN_NONCHAR
                                             |$::UNICODE_WARN_SUPER
-                                            |$::UNICODE_WARN_ABOVE_31_BIT;
+                                            |$::UNICODE_WARN_PERL_EXTENDED;
         $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
                                             |$::UNICODE_DISALLOW_SUPER
-                                            |$::UNICODE_DISALLOW_ABOVE_31_BIT;
+                                            |$::UNICODE_DISALLOW_PERL_EXTENDED;
         $controlling_warning_category = 'surrogate';
     }
     elsif (   ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF)
@@ -680,16 +681,16 @@ foreach my $test (@tests) {
 
         $utf8n_flag_to_warn_complement     = $::UTF8_WARN_SURROGATE
                                             |$::UTF8_WARN_SUPER
-                                            |$::UTF8_WARN_ABOVE_31_BIT;
+                                            |$::UTF8_WARN_PERL_EXTENDED;
         $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_SURROGATE
                                             |$::UTF8_DISALLOW_SUPER
-                                            |$::UTF8_DISALLOW_ABOVE_31_BIT;
+                                            |$::UTF8_DISALLOW_PERL_EXTENDED;
         $uvchr_flag_to_warn_complement     = $::UNICODE_WARN_SURROGATE
                                             |$::UNICODE_WARN_SUPER
-                                            |$::UNICODE_WARN_ABOVE_31_BIT;
+                                            |$::UNICODE_WARN_PERL_EXTENDED;
         $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_SURROGATE
                                             |$::UNICODE_DISALLOW_SUPER
-                                            |$::UNICODE_DISALLOW_ABOVE_31_BIT;
+                                            |$::UNICODE_DISALLOW_PERL_EXTENDED;
 
         $controlling_warning_category = 'nonchar';
     }
@@ -770,7 +771,8 @@ foreach my $test (@tests) {
         foreach my $disallow_type (0..2) {
             # 0 is don't disallow this type of code point
             # 1 is do disallow
-            # 2 is do disallow, but only for above 31 bit
+            # 2 is do disallow, but only code points requiring
+            #   perl-extended-UTF8
 
             my $disallow_flags;
             my $expected_ret;
@@ -790,7 +792,7 @@ foreach my $test (@tests) {
             }
             elsif ($disallow_type == 2) {
                 next if ! requires_extended_utf8($allowed_uv);
-                $disallow_flags = $::UTF8_DISALLOW_ABOVE_31_BIT;
+                $disallow_flags = $::UTF8_DISALLOW_PERL_EXTENDED;
                 $expected_ret = 0;
             }
             else {  # type is 0
@@ -1106,26 +1108,27 @@ foreach my $test (@tests) {
                         $expect_warnings_for_malformed = 0;
                     }
                     elsif ($warning_type == 4) {  # Like type 3, but uses the
-                                                  # above-31-bit flags
+                                                  # PERL_EXTENDED flags
                         # The complement flags were set up so that the
-                        # above-31-bit flags have been tested that they don't
+                        # PERL_EXTENDED flags have been tested that they don't
                         # trigger wrongly for too small code points.  And the
                         # flags have been set up so that those small code
                         # points are tested for being above Unicode.  What's
                         # left to test is that the large code points do
-                        # trigger the above-31-bit flags.
+                        # trigger the PERL_EXTENDED flags.
                         next if ! requires_extended_utf8($allowed_uv);
                         next if $controlling_warning_category ne 'non_unicode';
                         $eval_warn = "no warnings; use warnings 'non_unicode'";
                         $expect_regular_warnings = 1;
                         $expect_warnings_for_overflow = 1;
                         $expect_warnings_for_malformed = 0;
-                        $this_utf8n_flag_to_warn   = $::UTF8_WARN_ABOVE_31_BIT;
+                        $this_utf8n_flag_to_warn = $::UTF8_WARN_PERL_EXTENDED;
                         $this_utf8n_flag_to_disallow
-                                                = $::UTF8_DISALLOW_ABOVE_31_BIT;
-                        $this_uvchr_flag_to_warn = $::UNICODE_WARN_ABOVE_31_BIT;
+                                             = $::UTF8_DISALLOW_PERL_EXTENDED;
+                        $this_uvchr_flag_to_warn
+                                              = $::UNICODE_WARN_PERL_EXTENDED;
                         $this_uvchr_flag_to_disallow
-                                             = $::UNICODE_DISALLOW_ABOVE_31_BIT;
+                                          = $::UNICODE_DISALLOW_PERL_EXTENDED;
                     }
                     else {
                        die "Unexpected warning type '$warning_type'";
@@ -1180,14 +1183,14 @@ foreach my $test (@tests) {
                         # should emit a message or not.  It's tentative
                         # because, even if we ordinarily would output it, we
                         # don't if malformations are allowed -- except an
-                        # overflow is also a SUPER and ABOVE_31_BIT, and if
+                        # overflow is also a SUPER and PERL_EXTENDED, and if
                         # warnings for those are enabled, the overflow
                         # warning does get raised.
                         if (   $expect_warnings_for_overflow
                             && (    $malformed_allow_type == 0
                                 ||   (   $this_warning_flags
                                       & ($::UTF8_WARN_SUPER
-                                        |$::UTF8_WARN_ABOVE_31_BIT))))
+                                        |$::UTF8_WARN_PERL_EXTENDED))))
                         {
                             push @expected_warnings, $overflow_msg_pattern;
                         }
@@ -1298,10 +1301,10 @@ foreach my $test (@tests) {
                     for (my $i = @expected_return_flags - 1; $i >= 0; $i--) {
                         if ($expected_return_flags[$i] & $returned_flags) {
                             if ($expected_return_flags[$i]
-                                            == $::UTF8_DISALLOW_ABOVE_31_BIT)
+                                                == $::UTF8_GOT_PERL_EXTENDED)
                             {
                                 pass("    Expected and got return flag for"
-                                   . " above_31_bit");
+                                   . " PERL_EXTENDED");
                             }
                                    # The first entries in this are
                                    # malformations
index d840d3d..dc74d1d 100644 (file)
--- a/inline.h
+++ b/inline.h
@@ -609,19 +609,19 @@ S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
 
     PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-                          |UTF8_DISALLOW_ABOVE_31_BIT)));
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
 
     if (flags == 0) {
         return is_utf8_string(s, len);
     }
 
-    if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
     {
         return is_strict_utf8_string(s, len);
     }
 
-    if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
                                        == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
     {
         return is_c9strict_utf8_string(s, len);
@@ -855,19 +855,19 @@ S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRL
 
     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-                          |UTF8_DISALLOW_ABOVE_31_BIT)));
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
 
     if (flags == 0) {
         return is_utf8_string_loclen(s, len, ep, el);
     }
 
-    if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
     {
         return is_strict_utf8_string_loclen(s, len, ep, el);
     }
 
-    if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
                                     == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
     {
         return is_c9strict_utf8_string_loclen(s, len, ep, el);
@@ -1123,7 +1123,7 @@ S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const
     PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
 
     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-                          |UTF8_DISALLOW_ABOVE_31_BIT)));
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
 
     if (s >= e || s + UTF8SKIP(s) <= e) {
         return FALSE;
diff --git a/utf8.c b/utf8.c
index 67580ff..88c2b32 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -213,20 +213,20 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags)
                         cp_above_legal_max, uv, MAX_NON_DEPRECATED_CP);
         }
         if (       (flags & UNICODE_WARN_SUPER)
-            || (   UNICODE_IS_ABOVE_31_BIT(uv)
-                && (flags & UNICODE_WARN_ABOVE_31_BIT)))
+            || (   UNICODE_IS_PERL_EXTENDED(uv)
+                && (flags & UNICODE_WARN_PERL_EXTENDED)))
         {
             Perl_ck_warner_d(aTHX_ packWARN(WARN_NON_UNICODE),
 
               /* Choose the more dire applicable warning */
-              (UNICODE_IS_ABOVE_31_BIT(uv))
+              (UNICODE_IS_PERL_EXTENDED(uv))
               ? above_31_bit_cp_format
               : super_cp_format,
              uv);
         }
         if (       (flags & UNICODE_DISALLOW_SUPER)
-            || (   UNICODE_IS_ABOVE_31_BIT(uv)
-                && (flags & UNICODE_DISALLOW_ABOVE_31_BIT)))
+            || (   UNICODE_IS_PERL_EXTENDED(uv)
+                && (flags & UNICODE_DISALLOW_PERL_EXTENDED)))
         {
             return NULL;
         }
@@ -680,7 +680,7 @@ Perl__is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags)
     PERL_ARGS_ASSERT__IS_UTF8_CHAR_HELPER;
 
     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-                          |UTF8_DISALLOW_ABOVE_31_BIT)));
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
     assert(! UTF8_IS_INVARIANT(*s));
 
     /* A variant char must begin with a start byte */
@@ -742,7 +742,7 @@ Perl__is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags)
             return 0;           /* Above Unicode */
         }
 
-        if (   (flags & UTF8_DISALLOW_ABOVE_31_BIT)
+        if (   (flags & UTF8_DISALLOW_PERL_EXTENDED)
             &&  UNLIKELY(is_utf8_cp_above_31_bits(s, e)))
         {
             return 0;           /* Above 31 bits */
@@ -1320,11 +1320,11 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
        && ((flags & ( UTF8_DISALLOW_NONCHAR
                       |UTF8_DISALLOW_SURROGATE
                       |UTF8_DISALLOW_SUPER
-                      |UTF8_DISALLOW_ABOVE_31_BIT
+                      |UTF8_DISALLOW_PERL_EXTENDED
                      |UTF8_WARN_NONCHAR
                       |UTF8_WARN_SURROGATE
                       |UTF8_WARN_SUPER
-                      |UTF8_WARN_ABOVE_31_BIT))
+                      |UTF8_WARN_PERL_EXTENDED))
                    /* In case of a malformation, 'uv' is not valid, and has
                     * been changed to something in the Unicode range.
                     * Currently we don't output a deprecation message if there
@@ -1423,7 +1423,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                 /* Overflow means also got a super and are using Perl's
                  * extended UTF-8, but we handle all three cases here */
                 possible_problems
-                  &= ~(UTF8_GOT_OVERFLOW|UTF8_GOT_SUPER|UTF8_GOT_ABOVE_31_BIT);
+                  &= ~(UTF8_GOT_OVERFLOW|UTF8_GOT_SUPER|UTF8_GOT_PERL_EXTENDED);
                 *errors |= UTF8_GOT_OVERFLOW;
 
                 /* But the API says we flag all errors found */
@@ -1431,15 +1431,15 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                     *errors |= UTF8_GOT_SUPER;
                 }
                 if (flags
-                        & (UTF8_WARN_ABOVE_31_BIT|UTF8_DISALLOW_ABOVE_31_BIT))
+                        & (UTF8_WARN_PERL_EXTENDED|UTF8_DISALLOW_PERL_EXTENDED))
                 {
-                    *errors |= UTF8_GOT_ABOVE_31_BIT;
+                    *errors |= UTF8_GOT_PERL_EXTENDED;
                 }
 
                 /* Disallow if any of the three categories say to */
                 if ( ! (flags &   UTF8_ALLOW_OVERFLOW)
                     || (flags & ( UTF8_DISALLOW_SUPER
-                                 |UTF8_DISALLOW_ABOVE_31_BIT)))
+                                 |UTF8_DISALLOW_PERL_EXTENDED)))
                 {
                     disallowed = TRUE;
                 }
@@ -1448,7 +1448,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                  * are on, because this code point is above IV_MAX */
                 if (      ckWARN_d(WARN_DEPRECATED)
                     || ! (flags & UTF8_ALLOW_OVERFLOW)
-                    ||   (flags & (UTF8_WARN_SUPER|UTF8_WARN_ABOVE_31_BIT)))
+                    ||   (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED)))
                 {
 
                     /* The warnings code explicitly says it doesn't handle the
@@ -1610,18 +1610,18 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                  * test for these after the regular SUPER ones, and before
                  * possibly bailing out, so that the slightly more dire warning
                  * will override the regular one. */
-                if (   (flags & (UTF8_WARN_ABOVE_31_BIT
+                if (   (flags & (UTF8_WARN_PERL_EXTENDED
                                 |UTF8_WARN_SUPER
-                                |UTF8_DISALLOW_ABOVE_31_BIT))
+                                |UTF8_DISALLOW_PERL_EXTENDED))
                     && (   (   UNLIKELY(orig_problems & UTF8_GOT_TOO_SHORT)
                             && UNLIKELY(is_utf8_cp_above_31_bits(
                                                 adjusted_s0,
                                                 adjusted_send)))
                         || (   LIKELY(! (orig_problems & UTF8_GOT_TOO_SHORT))
-                            && UNLIKELY(UNICODE_IS_ABOVE_31_BIT(uv)))))
+                            && UNLIKELY(UNICODE_IS_PERL_EXTENDED(uv)))))
                 {
                     if (  ! (flags & UTF8_CHECK_ONLY)
-                        &&  (flags & (UTF8_WARN_ABOVE_31_BIT|UTF8_WARN_SUPER))
+                        &&  (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
                         &&  ckWARN_d(WARN_NON_UNICODE))
                     {
                         pack_warn = packWARN(WARN_NON_UNICODE);
@@ -1639,12 +1639,12 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                         }
                     }
 
-                    if (flags & ( UTF8_WARN_ABOVE_31_BIT
-                                 |UTF8_DISALLOW_ABOVE_31_BIT))
+                    if (flags & ( UTF8_WARN_PERL_EXTENDED
+                                 |UTF8_DISALLOW_PERL_EXTENDED))
                     {
-                        *errors |= UTF8_GOT_ABOVE_31_BIT;
+                        *errors |= UTF8_GOT_PERL_EXTENDED;
 
-                        if (flags & UTF8_DISALLOW_ABOVE_31_BIT) {
+                        if (flags & UTF8_DISALLOW_PERL_EXTENDED) {
                             disallowed = TRUE;
                         }
                     }
diff --git a/utf8.h b/utf8.h
index e26f3cc..c880375 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -777,13 +777,16 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
  * went up to 2 ** 31 - 1.  Note that these all overflow a signed 32-bit word,
  * The first byte of these code points is FE or FF on ASCII platforms.  If the
  * first byte is FF, it will overflow a 32-bit word. */
-#define UTF8_DISALLOW_ABOVE_31_BIT      0x4000
-#define UTF8_GOT_ABOVE_31_BIT           UTF8_DISALLOW_ABOVE_31_BIT
-#define UTF8_WARN_ABOVE_31_BIT          0x8000
+#define UTF8_DISALLOW_PERL_EXTENDED     0x4000
+#define UTF8_GOT_PERL_EXTENDED          UTF8_DISALLOW_PERL_EXTENDED
+#define UTF8_WARN_PERL_EXTENDED         0x8000
 
 /* For back compat, these old names are misleading for UTF_EBCDIC */
-#define UTF8_DISALLOW_FE_FF             UTF8_DISALLOW_ABOVE_31_BIT
-#define UTF8_WARN_FE_FF                 UTF8_WARN_ABOVE_31_BIT
+#define UTF8_DISALLOW_ABOVE_31_BIT      UTF8_DISALLOW_PERL_EXTENDED
+#define UTF8_GOT_ABOVE_31_BIT           UTF8_GOT_PERL_EXTENDED
+#define UTF8_WARN_ABOVE_31_BIT          UTF8_WARN_PERL_EXTENDED
+#define UTF8_DISALLOW_FE_FF             UTF8_DISALLOW_PERL_EXTENDED
+#define UTF8_WARN_FE_FF                 UTF8_WARN_PERL_EXTENDED
 
 #define UTF8_CHECK_ONLY                        0x10000
 #define _UTF8_NO_CONFIDENCE_IN_CURLEN   0x20000  /* Internal core use only */
@@ -907,14 +910,16 @@ point's representation.
  * let's be conservative and do as Unicode says. */
 #define PERL_UNICODE_MAX       0x10FFFF
 
-#define UNICODE_WARN_SURROGATE        0x0001   /* UTF-16 surrogates */
-#define UNICODE_WARN_NONCHAR          0x0002   /* Non-char code points */
-#define UNICODE_WARN_SUPER            0x0004   /* Above 0x10FFFF */
-#define UNICODE_WARN_ABOVE_31_BIT     0x0008   /* Above 0x7FFF_FFFF */
-#define UNICODE_DISALLOW_SURROGATE    0x0010
-#define UNICODE_DISALLOW_NONCHAR      0x0020
-#define UNICODE_DISALLOW_SUPER        0x0040
-#define UNICODE_DISALLOW_ABOVE_31_BIT 0x0080
+#define UNICODE_WARN_SURROGATE         0x0001  /* UTF-16 surrogates */
+#define UNICODE_WARN_NONCHAR           0x0002  /* Non-char code points */
+#define UNICODE_WARN_SUPER             0x0004  /* Above 0x10FFFF */
+#define UNICODE_WARN_PERL_EXTENDED     0x0008  /* Above 0x7FFF_FFFF */
+#define UNICODE_WARN_ABOVE_31_BIT      UNICODE_WARN_PERL_EXTENDED
+#define UNICODE_DISALLOW_SURROGATE     0x0010
+#define UNICODE_DISALLOW_NONCHAR       0x0020
+#define UNICODE_DISALLOW_SUPER         0x0040
+#define UNICODE_DISALLOW_PERL_EXTENDED 0x0080
+#define UNICODE_DISALLOW_ABOVE_31_BIT  UNICODE_DISALLOW_PERL_EXTENDED
 #define UNICODE_WARN_ILLEGAL_C9_INTERCHANGE                                   \
                                   (UNICODE_WARN_SURROGATE|UNICODE_WARN_SUPER)
 #define UNICODE_WARN_ILLEGAL_INTERCHANGE                                      \
@@ -953,7 +958,7 @@ point's representation.
          && UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv)))
 
 #define UNICODE_IS_SUPER(uv)    ((UV) (uv) > PERL_UNICODE_MAX)
-#define UNICODE_IS_ABOVE_31_BIT(uv)    ((UV) (uv) > 0x7FFFFFFF)
+#define UNICODE_IS_PERL_EXTENDED(uv)    ((UV) (uv) > 0x7FFFFFFF)
 
 #define LATIN_SMALL_LETTER_SHARP_S      LATIN_SMALL_LETTER_SHARP_S_NATIVE
 #define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS                                  \