utf8.h: Add synonyms for flag names

author Karl Williamson <khw@cpan.org>

Mon, 26 Jun 2017 17:43:21 +0000 (11:43 -0600)

committer Karl Williamson <khw@cpan.org>

Thu, 13 Jul 2017 03:14:25 +0000 (21:14 -0600)
author Karl Williamson <khw@cpan.org>
Mon, 26 Jun 2017 17:43:21 +0000 (11:43 -0600)
committer Karl Williamson <khw@cpan.org>
Thu, 13 Jul 2017 03:14:25 +0000 (21:14 -0600)
diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t

index b057837..37c65aa 100644 (file)
--- a/ext/XS-APItest/t/utf8.t
+++ b/ext/XS-APItest/t/utf8.t
@@ -29,8 +29,8 @@ my $look_for_everything_utf8n_to
                         | $::UTF8_WARN_NONCHAR
                         | $::UTF8_DISALLOW_SUPER
                         | $::UTF8_WARN_SUPER
-                       | $::UTF8_DISALLOW_ABOVE_31_BIT
-                       | $::UTF8_WARN_ABOVE_31_BIT;
+                       | $::UTF8_DISALLOW_PERL_EXTENDED
+                       | $::UTF8_WARN_PERL_EXTENDED;
  my $look_for_everything_uvchr_to
                          = $::UNICODE_DISALLOW_SURROGATE
                         | $::UNICODE_WARN_SURROGATE
@@ -38,8 +38,8 @@ my $look_for_everything_uvchr_to
                         | $::UNICODE_WARN_NONCHAR
                         | $::UNICODE_DISALLOW_SUPER
                         | $::UNICODE_WARN_SUPER
-                       | $::UNICODE_DISALLOW_ABOVE_31_BIT
-                       | $::UNICODE_WARN_ABOVE_31_BIT;
+                       | $::UNICODE_DISALLOW_PERL_EXTENDED
+                       | $::UNICODE_WARN_PERL_EXTENDED;
  
  foreach ([0, '', '', 'empty'],
          [0, 'N', 'N', '1 char'],
@@ -620,15 +620,15 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) }
  
      my $valid_under_strict = 1;
      my $valid_under_c9strict = 1;
-    my $valid_for_fits_in_31_bits = 1;
+    my $valid_for_not_extended_utf8 = 1;
      if ($n > 0x10FFFF) {
          $this_utf8_flags &= ~($::UTF8_DISALLOW_SUPER|$::UTF8_WARN_SUPER);
          $valid_under_strict = 0;
          $valid_under_c9strict = 0;
          if ($n > 2 ** 31 - 1) {
              $this_utf8_flags &=
-                        ~($::UTF8_DISALLOW_ABOVE_31_BIT|$::UTF8_WARN_ABOVE_31_BIT);
-            $valid_for_fits_in_31_bits = 0;
+                ~($::UTF8_DISALLOW_PERL_EXTENDED|$::UTF8_WARN_PERL_EXTENDED);
+            $valid_for_not_extended_utf8 = 0;
          }
      }
      elsif (($n >= 0xFDD0 && $n <= 0xFDEF) || ($n & 0xFFFE) == 0xFFFE) {
@@ -784,17 +784,18 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) }
      my $this_uvchr_flags = $look_for_everything_uvchr_to;
      if ($n > 2 ** 31 - 1) {
          $this_uvchr_flags &=
-                ~($::UNICODE_DISALLOW_ABOVE_31_BIT|$::UNICODE_WARN_ABOVE_31_BIT);
+            ~($::UNICODE_DISALLOW_PERL_EXTENDED|$::UNICODE_WARN_PERL_EXTENDED);
      }
      if ($n > 0x10FFFF) {
          $this_uvchr_flags &= ~($::UNICODE_DISALLOW_SUPER|$::UNICODE_WARN_SUPER);
      }
      elsif (($n >= 0xFDD0 && $n <= 0xFDEF) || ($n & 0xFFFE) == 0xFFFE) {
-        $this_uvchr_flags &= ~($::UNICODE_DISALLOW_NONCHAR|$::UNICODE_WARN_NONCHAR);
+        $this_uvchr_flags
+                     &= ~($::UNICODE_DISALLOW_NONCHAR|$::UNICODE_WARN_NONCHAR);
      }
      elsif ($n >= 0xD800 && $n <= 0xDFFF) {
          $this_uvchr_flags
-                     &= ~($::UNICODE_DISALLOW_SURROGATE|$::UNICODE_WARN_SURROGATE);
+                &= ~($::UNICODE_DISALLOW_SURROGATE|$::UNICODE_WARN_SURROGATE);
      }
      $display_flags = sprintf "0x%x", $this_uvchr_flags;
  
@@ -844,17 +845,17 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) }
                                  = $restriction_types{"strict"}{'valid_counts'};
      }
  
-    if ($valid_for_fits_in_31_bits) {
-        $restriction_types{"fits_in_31_bits"}{'valid_strings'} .= $bytes;
-        $restriction_types{"fits_in_31_bits"}{'valid_counts'}++;
+    if ($valid_for_not_extended_utf8) {
+        $restriction_types{"not_extended_utf8"}{'valid_strings'} .= $bytes;
+        $restriction_types{"not_extended_utf8"}{'valid_counts'}++;
      }
      elsif (! exists
-                $restriction_types{"fits_in_31_bits"}{'first_invalid_offset'})
+                $restriction_types{"not_extended_utf8"}{'first_invalid_offset'})
      {
-        $restriction_types{"fits_in_31_bits"}{'first_invalid_offset'}
-                = length $restriction_types{"fits_in_31_bits"}{'valid_strings'};
-        $restriction_types{"fits_in_31_bits"}{'first_invalid_count'}
-                        = $restriction_types{"fits_in_31_bits"}{'valid_counts'};
+        $restriction_types{"not_extended_utf8"}{'first_invalid_offset'}
+                = length $restriction_types{"not_extended_utf8"}{'valid_strings'};
+        $restriction_types{"not_extended_utf8"}{'first_invalid_count'}
+                        = $restriction_types{"not_extended_utf8"}{'valid_counts'};
      }
  }
  
@@ -874,7 +875,7 @@ for my $restriction (sort keys %restriction_types) {
          # and the specially named foo function.  But not if there isn't such a
          # specially named function.  Currently, this is the only tested
          # restriction that doesn't have a specially named function
-        next if $use_flags eq "" && $restriction eq "fits_in_31_bits";
+        next if $use_flags eq "" && $restriction eq "not_extended_utf8";
  
          # Start building up the name of the function we will test.
          my $base_name = "is_";
@@ -994,8 +995,8 @@ for my $restriction (sort keys %restriction_types) {
                              elsif ($restriction eq "strict") {
                                  $test .= ", $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE";
                              }
-                            elsif ($restriction eq "fits_in_31_bits") {
-                                $test .= ", $::UTF8_DISALLOW_ABOVE_31_BIT";
+                            elsif ($restriction eq "not_extended_utf8") {
+                                $test .= ", $::UTF8_DISALLOW_PERL_EXTENDED";
                              }
                              else {
                                  fail("Internal test error: Unknown restriction "
diff --git a/ext/XS-APItest/t/utf8_setup.pl b/ext/XS-APItest/t/utf8_setup.pl

index 62b0649..ec7a5ce 100644 (file)
--- a/ext/XS-APItest/t/utf8_setup.pl
+++ b/ext/XS-APItest/t/utf8_setup.pl
@@ -82,9 +82,9 @@ $::UTF8_WARN_NONCHAR           = 0x0800;
  $::UTF8_DISALLOW_SUPER         = 0x1000;
  $::UTF8_GOT_SUPER              = $UTF8_DISALLOW_SUPER;
  $::UTF8_WARN_SUPER             = 0x2000;
-$::UTF8_DISALLOW_ABOVE_31_BIT  = 0x4000;
-$::UTF8_GOT_ABOVE_31_BIT       = $UTF8_DISALLOW_ABOVE_31_BIT;
-$::UTF8_WARN_ABOVE_31_BIT      = 0x8000;
+$::UTF8_DISALLOW_PERL_EXTENDED  = 0x4000;
+$::UTF8_GOT_PERL_EXTENDED       = $UTF8_DISALLOW_PERL_EXTENDED;
+$::UTF8_WARN_PERL_EXTENDED      = 0x8000;
  $::UTF8_CHECK_ONLY             = 0x10000;
  $::UTF8_NO_CONFIDENCE_IN_CURLEN_ = 0x20000;
  
@@ -101,8 +101,8 @@ $::UTF8_WARN_ILLEGAL_INTERCHANGE
  $::UNICODE_WARN_SURROGATE        = 0x0001;
  $::UNICODE_WARN_NONCHAR          = 0x0002;
  $::UNICODE_WARN_SUPER            = 0x0004;
-$::UNICODE_WARN_ABOVE_31_BIT     = 0x0008;
+$::UNICODE_WARN_PERL_EXTENDED     = 0x0008;
  $::UNICODE_DISALLOW_SURROGATE    = 0x0010;
  $::UNICODE_DISALLOW_NONCHAR      = 0x0020;
  $::UNICODE_DISALLOW_SUPER        = 0x0040;
-$::UNICODE_DISALLOW_ABOVE_31_BIT = 0x0080;
+$::UNICODE_DISALLOW_PERL_EXTENDED = 0x0080;
diff --git a/ext/XS-APItest/t/utf8_warn_base.pl b/ext/XS-APItest/t/utf8_warn_base.pl

index 619a554..94df88e 100644 (file)
--- a/ext/XS-APItest/t/utf8_warn_base.pl
+++ b/ext/XS-APItest/t/utf8_warn_base.pl
@@ -437,8 +437,8 @@ my @utf8n_flags_to_text =  ( qw(
          W_NONCHAR
          D_SUPER
          W_SUPER
-        D_ABOVE_31_BIT
-        W_ABOVE_31_BIT
+        D_PERL_EXTENDED
+        W_PERL_EXTENDED
          CHECK_ONLY
          NO_CONFIDENCE_IN_CURLEN_
      ) );
@@ -475,11 +475,11 @@ sub uvchr_display_call($)
              W_SURROGATE
              W_NONCHAR
              W_SUPER
-            W_ABOVE_31_BIT
+            W_PERL_EXTENDED
              D_SURROGATE
              D_NONCHAR
              D_SUPER
-            D_ABOVE_31_BIT
+            D_PERL_EXTENDED
         ) );
  
      $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x;
@@ -594,15 +594,15 @@ foreach my $test (@tests) {
  
      if ($will_overflow || $allowed_uv > 0x10FFFF) {
  
-        # Set the SUPER flags; later, we test for ABOVE_31_BIT as well.
+        # Set the SUPER flags; later, we test for PERL_EXTENDED as well.
          $utf8n_flag_to_warn     = $::UTF8_WARN_SUPER;
          $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER;
          $uvchr_flag_to_warn     = $::UNICODE_WARN_SUPER;
          $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;;
  
-        # Below, we add the flags for non-above-31 bit to the code points that
-        # don't fit that category.  Special tests are done for this category
-        # in the inner loop.
+        # Below, we add the flags for non-perl_extended to the code points
+        # that don't fit that category.  Special tests are done for this
+        # category in the inner loop.
          $utf8n_flag_to_warn_complement     = $::UTF8_WARN_NONCHAR
                                              |$::UTF8_WARN_SURROGATE;
          $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
@@ -628,11 +628,12 @@ foreach my $test (@tests) {
                                  \Q may not be portable\E/x;
              $non_cp_trailing_text = "is for a non-Unicode code point, may not"
                                  . " be portable";
-            $utf8n_flag_to_warn_complement     |= $::UTF8_WARN_ABOVE_31_BIT;
-            $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_ABOVE_31_BIT;
-            $uvchr_flag_to_warn_complement     |= $::UNICODE_WARN_ABOVE_31_BIT;
+            $utf8n_flag_to_warn_complement     |= $::UTF8_WARN_PERL_EXTENDED;
+            $utf8n_flag_to_disallow_complement
+                                           |= $::UTF8_DISALLOW_PERL_EXTENDED;
+            $uvchr_flag_to_warn_complement |= $::UNICODE_WARN_PERL_EXTENDED;
              $uvchr_flag_to_disallow_complement
-                                            |= $::UNICODE_DISALLOW_ABOVE_31_BIT;
+                                        |= $::UNICODE_DISALLOW_PERL_EXTENDED;
          }
      }
      elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) {
@@ -648,16 +649,16 @@ foreach my $test (@tests) {
  
          $utf8n_flag_to_warn_complement     = $::UTF8_WARN_NONCHAR
                                              |$::UTF8_WARN_SUPER
-                                            |$::UTF8_WARN_ABOVE_31_BIT;
+                                            |$::UTF8_WARN_PERL_EXTENDED;
          $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
                                              |$::UTF8_DISALLOW_SUPER
-                                            |$::UTF8_DISALLOW_ABOVE_31_BIT;
+                                            |$::UTF8_DISALLOW_PERL_EXTENDED;
          $uvchr_flag_to_warn_complement     = $::UNICODE_WARN_NONCHAR
                                              |$::UNICODE_WARN_SUPER
-                                            |$::UNICODE_WARN_ABOVE_31_BIT;
+                                            |$::UNICODE_WARN_PERL_EXTENDED;
          $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
                                              |$::UNICODE_DISALLOW_SUPER
-                                            |$::UNICODE_DISALLOW_ABOVE_31_BIT;
+                                            |$::UNICODE_DISALLOW_PERL_EXTENDED;
          $controlling_warning_category = 'surrogate';
      }
      elsif (   ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF)
@@ -680,16 +681,16 @@ foreach my $test (@tests) {
  
          $utf8n_flag_to_warn_complement     = $::UTF8_WARN_SURROGATE
                                              |$::UTF8_WARN_SUPER
-                                            |$::UTF8_WARN_ABOVE_31_BIT;
+                                            |$::UTF8_WARN_PERL_EXTENDED;
          $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_SURROGATE
                                              |$::UTF8_DISALLOW_SUPER
-                                            |$::UTF8_DISALLOW_ABOVE_31_BIT;
+                                            |$::UTF8_DISALLOW_PERL_EXTENDED;
          $uvchr_flag_to_warn_complement     = $::UNICODE_WARN_SURROGATE
                                              |$::UNICODE_WARN_SUPER
-                                            |$::UNICODE_WARN_ABOVE_31_BIT;
+                                            |$::UNICODE_WARN_PERL_EXTENDED;
          $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_SURROGATE
                                              |$::UNICODE_DISALLOW_SUPER
-                                            |$::UNICODE_DISALLOW_ABOVE_31_BIT;
+                                            |$::UNICODE_DISALLOW_PERL_EXTENDED;
  
          $controlling_warning_category = 'nonchar';
      }
@@ -770,7 +771,8 @@ foreach my $test (@tests) {
          foreach my $disallow_type (0..2) {
              # 0 is don't disallow this type of code point
              # 1 is do disallow
-            # 2 is do disallow, but only for above 31 bit
+            # 2 is do disallow, but only code points requiring
+            #   perl-extended-UTF8
  
              my $disallow_flags;
              my $expected_ret;
@@ -790,7 +792,7 @@ foreach my $test (@tests) {
              }
              elsif ($disallow_type == 2) {
                  next if ! requires_extended_utf8($allowed_uv);
-                $disallow_flags = $::UTF8_DISALLOW_ABOVE_31_BIT;
+                $disallow_flags = $::UTF8_DISALLOW_PERL_EXTENDED;
                  $expected_ret = 0;
              }
              else {  # type is 0
@@ -1106,26 +1108,27 @@ foreach my $test (@tests) {
                          $expect_warnings_for_malformed = 0;
                      }
                      elsif ($warning_type == 4) {  # Like type 3, but uses the
-                                                  # above-31-bit flags
+                                                  # PERL_EXTENDED flags
                          # The complement flags were set up so that the
-                        # above-31-bit flags have been tested that they don't
+                        # PERL_EXTENDED flags have been tested that they don't
                          # trigger wrongly for too small code points.  And the
                          # flags have been set up so that those small code
                          # points are tested for being above Unicode.  What's
                          # left to test is that the large code points do
-                        # trigger the above-31-bit flags.
+                        # trigger the PERL_EXTENDED flags.
                          next if ! requires_extended_utf8($allowed_uv);
                          next if $controlling_warning_category ne 'non_unicode';
                          $eval_warn = "no warnings; use warnings 'non_unicode'";
                          $expect_regular_warnings = 1;
                          $expect_warnings_for_overflow = 1;
                          $expect_warnings_for_malformed = 0;
-                        $this_utf8n_flag_to_warn   = $::UTF8_WARN_ABOVE_31_BIT;
+                        $this_utf8n_flag_to_warn = $::UTF8_WARN_PERL_EXTENDED;
                          $this_utf8n_flag_to_disallow
-                                                = $::UTF8_DISALLOW_ABOVE_31_BIT;
-                        $this_uvchr_flag_to_warn = $::UNICODE_WARN_ABOVE_31_BIT;
+                                             = $::UTF8_DISALLOW_PERL_EXTENDED;
+                        $this_uvchr_flag_to_warn
+                                              = $::UNICODE_WARN_PERL_EXTENDED;
                          $this_uvchr_flag_to_disallow
-                                             = $::UNICODE_DISALLOW_ABOVE_31_BIT;
+                                          = $::UNICODE_DISALLOW_PERL_EXTENDED;
                      }
                      else {
                         die "Unexpected warning type '$warning_type'";
@@ -1180,14 +1183,14 @@ foreach my $test (@tests) {
                          # should emit a message or not.  It's tentative
                          # because, even if we ordinarily would output it, we
                          # don't if malformations are allowed -- except an
-                        # overflow is also a SUPER and ABOVE_31_BIT, and if
+                        # overflow is also a SUPER and PERL_EXTENDED, and if
                          # warnings for those are enabled, the overflow
                          # warning does get raised.
                          if (   $expect_warnings_for_overflow
                              && (    $malformed_allow_type == 0
                                  ||   (   $this_warning_flags
                                        & ($::UTF8_WARN_SUPER
-                                        |$::UTF8_WARN_ABOVE_31_BIT))))
+                                        |$::UTF8_WARN_PERL_EXTENDED))))
                          {
                              push @expected_warnings, $overflow_msg_pattern;
                          }
@@ -1298,10 +1301,10 @@ foreach my $test (@tests) {
                      for (my $i = @expected_return_flags - 1; $i >= 0; $i--) {
                          if ($expected_return_flags[$i] & $returned_flags) {
                              if ($expected_return_flags[$i]
-                                            == $::UTF8_DISALLOW_ABOVE_31_BIT)
+                                                == $::UTF8_GOT_PERL_EXTENDED)
                              {
                                  pass("    Expected and got return flag for"
-                                   . " above_31_bit");
+                                   . " PERL_EXTENDED");
                              }
                                     # The first entries in this are
                                     # malformations
diff --git a/inline.h b/inline.h

index d840d3d..dc74d1d 100644 (file)
--- a/inline.h
+++ b/inline.h
@@ -609,19 +609,19 @@ S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
  
      PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
      assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-                          |UTF8_DISALLOW_ABOVE_31_BIT)));
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
  
      if (flags == 0) {
          return is_utf8_string(s, len);
      }
  
-    if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
                                          == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
      {
          return is_strict_utf8_string(s, len);
      }
  
-    if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
                                         == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
      {
          return is_c9strict_utf8_string(s, len);
@@ -855,19 +855,19 @@ S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRL
  
      PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
      assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-                          |UTF8_DISALLOW_ABOVE_31_BIT)));
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
  
      if (flags == 0) {
          return is_utf8_string_loclen(s, len, ep, el);
      }
  
-    if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
                                          == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
      {
          return is_strict_utf8_string_loclen(s, len, ep, el);
      }
  
-    if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+    if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
                                      == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
      {
          return is_c9strict_utf8_string_loclen(s, len, ep, el);
@@ -1123,7 +1123,7 @@ S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const
      PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
  
      assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-                          |UTF8_DISALLOW_ABOVE_31_BIT)));
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
  
      if (s >= e || s + UTF8SKIP(s) <= e) {
          return FALSE;
diff --git a/utf8.c b/utf8.c

index 67580ff..88c2b32 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -213,20 +213,20 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags)
                          cp_above_legal_max, uv, MAX_NON_DEPRECATED_CP);
          }
          if (       (flags & UNICODE_WARN_SUPER)
-            || (   UNICODE_IS_ABOVE_31_BIT(uv)
-                && (flags & UNICODE_WARN_ABOVE_31_BIT)))
+            || (   UNICODE_IS_PERL_EXTENDED(uv)
+                && (flags & UNICODE_WARN_PERL_EXTENDED)))
          {
              Perl_ck_warner_d(aTHX_ packWARN(WARN_NON_UNICODE),
  
                /* Choose the more dire applicable warning */
-              (UNICODE_IS_ABOVE_31_BIT(uv))
+              (UNICODE_IS_PERL_EXTENDED(uv))
                ? above_31_bit_cp_format
                : super_cp_format,
               uv);
          }
          if (       (flags & UNICODE_DISALLOW_SUPER)
-            || (   UNICODE_IS_ABOVE_31_BIT(uv)
-                && (flags & UNICODE_DISALLOW_ABOVE_31_BIT)))
+            || (   UNICODE_IS_PERL_EXTENDED(uv)
+                && (flags & UNICODE_DISALLOW_PERL_EXTENDED)))
          {
              return NULL;
          }
@@ -680,7 +680,7 @@ Perl__is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags)
      PERL_ARGS_ASSERT__IS_UTF8_CHAR_HELPER;
  
      assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-                          |UTF8_DISALLOW_ABOVE_31_BIT)));
+                          |UTF8_DISALLOW_PERL_EXTENDED)));
      assert(! UTF8_IS_INVARIANT(*s));
  
      /* A variant char must begin with a start byte */
@@ -742,7 +742,7 @@ Perl__is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags)
              return 0;           /* Above Unicode */
          }
  
-        if (   (flags & UTF8_DISALLOW_ABOVE_31_BIT)
+        if (   (flags & UTF8_DISALLOW_PERL_EXTENDED)
              &&  UNLIKELY(is_utf8_cp_above_31_bits(s, e)))
          {
              return 0;           /* Above 31 bits */
@@ -1320,11 +1320,11 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
         && ((flags & ( UTF8_DISALLOW_NONCHAR
                        |UTF8_DISALLOW_SURROGATE
                        |UTF8_DISALLOW_SUPER
-                      |UTF8_DISALLOW_ABOVE_31_BIT
+                      |UTF8_DISALLOW_PERL_EXTENDED
                       |UTF8_WARN_NONCHAR
                        |UTF8_WARN_SURROGATE
                        |UTF8_WARN_SUPER
-                      |UTF8_WARN_ABOVE_31_BIT))
+                      |UTF8_WARN_PERL_EXTENDED))
                     /* In case of a malformation, 'uv' is not valid, and has
                      * been changed to something in the Unicode range.
                      * Currently we don't output a deprecation message if there
@@ -1423,7 +1423,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                  /* Overflow means also got a super and are using Perl's
                   * extended UTF-8, but we handle all three cases here */
                  possible_problems
-                  &= ~(UTF8_GOT_OVERFLOW|UTF8_GOT_SUPER|UTF8_GOT_ABOVE_31_BIT);
+                  &= ~(UTF8_GOT_OVERFLOW|UTF8_GOT_SUPER|UTF8_GOT_PERL_EXTENDED);
                  *errors |= UTF8_GOT_OVERFLOW;
  
                  /* But the API says we flag all errors found */
@@ -1431,15 +1431,15 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                      *errors |= UTF8_GOT_SUPER;
                  }
                  if (flags
-                        & (UTF8_WARN_ABOVE_31_BIT|UTF8_DISALLOW_ABOVE_31_BIT))
+                        & (UTF8_WARN_PERL_EXTENDED|UTF8_DISALLOW_PERL_EXTENDED))
                  {
-                    *errors |= UTF8_GOT_ABOVE_31_BIT;
+                    *errors |= UTF8_GOT_PERL_EXTENDED;
                  }
  
                  /* Disallow if any of the three categories say to */
                  if ( ! (flags &   UTF8_ALLOW_OVERFLOW)
                      || (flags & ( UTF8_DISALLOW_SUPER
-                                 |UTF8_DISALLOW_ABOVE_31_BIT)))
+                                 |UTF8_DISALLOW_PERL_EXTENDED)))
                  {
                      disallowed = TRUE;
                  }
@@ -1448,7 +1448,7 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                   * are on, because this code point is above IV_MAX */
                  if (      ckWARN_d(WARN_DEPRECATED)
                      || ! (flags & UTF8_ALLOW_OVERFLOW)
-                    ||   (flags & (UTF8_WARN_SUPER|UTF8_WARN_ABOVE_31_BIT)))
+                    ||   (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED)))
                  {
  
                      /* The warnings code explicitly says it doesn't handle the
@@ -1610,18 +1610,18 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                   * test for these after the regular SUPER ones, and before
                   * possibly bailing out, so that the slightly more dire warning
                   * will override the regular one. */
-                if (   (flags & (UTF8_WARN_ABOVE_31_BIT
+                if (   (flags & (UTF8_WARN_PERL_EXTENDED
                                  |UTF8_WARN_SUPER
-                                |UTF8_DISALLOW_ABOVE_31_BIT))
+                                |UTF8_DISALLOW_PERL_EXTENDED))
                      && (   (   UNLIKELY(orig_problems & UTF8_GOT_TOO_SHORT)
                              && UNLIKELY(is_utf8_cp_above_31_bits(
                                                  adjusted_s0,
                                                  adjusted_send)))
                          || (   LIKELY(! (orig_problems & UTF8_GOT_TOO_SHORT))
-                            && UNLIKELY(UNICODE_IS_ABOVE_31_BIT(uv)))))
+                            && UNLIKELY(UNICODE_IS_PERL_EXTENDED(uv)))))
                  {
                      if (  ! (flags & UTF8_CHECK_ONLY)
-                        &&  (flags & (UTF8_WARN_ABOVE_31_BIT|UTF8_WARN_SUPER))
+                        &&  (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
                          &&  ckWARN_d(WARN_NON_UNICODE))
                      {
                          pack_warn = packWARN(WARN_NON_UNICODE);
@@ -1639,12 +1639,12 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                          }
                      }
  
-                    if (flags & ( UTF8_WARN_ABOVE_31_BIT
-                                 |UTF8_DISALLOW_ABOVE_31_BIT))
+                    if (flags & ( UTF8_WARN_PERL_EXTENDED
+                                 |UTF8_DISALLOW_PERL_EXTENDED))
                      {
-                        *errors |= UTF8_GOT_ABOVE_31_BIT;
+                        *errors |= UTF8_GOT_PERL_EXTENDED;
  
-                        if (flags & UTF8_DISALLOW_ABOVE_31_BIT) {
+                        if (flags & UTF8_DISALLOW_PERL_EXTENDED) {
                              disallowed = TRUE;
                          }
                      }
diff --git a/utf8.h b/utf8.h

index e26f3cc..c880375 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -777,13 +777,16 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
   * went up to 2 ** 31 - 1.  Note that these all overflow a signed 32-bit word,
   * The first byte of these code points is FE or FF on ASCII platforms.  If the
   * first byte is FF, it will overflow a 32-bit word. */
-#define UTF8_DISALLOW_ABOVE_31_BIT      0x4000
-#define UTF8_GOT_ABOVE_31_BIT           UTF8_DISALLOW_ABOVE_31_BIT
-#define UTF8_WARN_ABOVE_31_BIT          0x8000
+#define UTF8_DISALLOW_PERL_EXTENDED     0x4000
+#define UTF8_GOT_PERL_EXTENDED          UTF8_DISALLOW_PERL_EXTENDED
+#define UTF8_WARN_PERL_EXTENDED         0x8000
  
  /* For back compat, these old names are misleading for UTF_EBCDIC */
-#define UTF8_DISALLOW_FE_FF             UTF8_DISALLOW_ABOVE_31_BIT
-#define UTF8_WARN_FE_FF                 UTF8_WARN_ABOVE_31_BIT
+#define UTF8_DISALLOW_ABOVE_31_BIT      UTF8_DISALLOW_PERL_EXTENDED
+#define UTF8_GOT_ABOVE_31_BIT           UTF8_GOT_PERL_EXTENDED
+#define UTF8_WARN_ABOVE_31_BIT          UTF8_WARN_PERL_EXTENDED
+#define UTF8_DISALLOW_FE_FF             UTF8_DISALLOW_PERL_EXTENDED
+#define UTF8_WARN_FE_FF                 UTF8_WARN_PERL_EXTENDED
  
  #define UTF8_CHECK_ONLY                        0x10000
  #define _UTF8_NO_CONFIDENCE_IN_CURLEN   0x20000  /* Internal core use only */
@@ -907,14 +910,16 @@ point's representation.
   * let's be conservative and do as Unicode says. */
  #define PERL_UNICODE_MAX       0x10FFFF
  
-#define UNICODE_WARN_SURROGATE        0x0001   /* UTF-16 surrogates */
-#define UNICODE_WARN_NONCHAR          0x0002   /* Non-char code points */
-#define UNICODE_WARN_SUPER            0x0004   /* Above 0x10FFFF */
-#define UNICODE_WARN_ABOVE_31_BIT     0x0008   /* Above 0x7FFF_FFFF */
-#define UNICODE_DISALLOW_SURROGATE    0x0010
-#define UNICODE_DISALLOW_NONCHAR      0x0020
-#define UNICODE_DISALLOW_SUPER        0x0040
-#define UNICODE_DISALLOW_ABOVE_31_BIT 0x0080
+#define UNICODE_WARN_SURROGATE         0x0001  /* UTF-16 surrogates */
+#define UNICODE_WARN_NONCHAR           0x0002  /* Non-char code points */
+#define UNICODE_WARN_SUPER             0x0004  /* Above 0x10FFFF */
+#define UNICODE_WARN_PERL_EXTENDED     0x0008  /* Above 0x7FFF_FFFF */
+#define UNICODE_WARN_ABOVE_31_BIT      UNICODE_WARN_PERL_EXTENDED
+#define UNICODE_DISALLOW_SURROGATE     0x0010
+#define UNICODE_DISALLOW_NONCHAR       0x0020
+#define UNICODE_DISALLOW_SUPER         0x0040
+#define UNICODE_DISALLOW_PERL_EXTENDED 0x0080
+#define UNICODE_DISALLOW_ABOVE_31_BIT  UNICODE_DISALLOW_PERL_EXTENDED
  #define UNICODE_WARN_ILLEGAL_C9_INTERCHANGE                                   \
                                    (UNICODE_WARN_SURROGATE|UNICODE_WARN_SUPER)
  #define UNICODE_WARN_ILLEGAL_INTERCHANGE                                      \
@@ -953,7 +958,7 @@ point's representation.
           && UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv)))
  
  #define UNICODE_IS_SUPER(uv)    ((UV) (uv) > PERL_UNICODE_MAX)
-#define UNICODE_IS_ABOVE_31_BIT(uv)    ((UV) (uv) > 0x7FFFFFFF)
+#define UNICODE_IS_PERL_EXTENDED(uv)    ((UV) (uv) > 0x7FFFFFFF)
  
  #define LATIN_SMALL_LETTER_SHARP_S      LATIN_SMALL_LETTER_SHARP_S_NATIVE
  #define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS                                  \
author	Karl Williamson <khw@cpan.org>
	Mon, 26 Jun 2017 17:43:21 +0000 (11:43 -0600)
committer	Karl Williamson <khw@cpan.org>
	Thu, 13 Jul 2017 03:14:25 +0000 (21:14 -0600)
ext/XS-APItest/t/utf8.t		patch \| blob \| blame \| history
ext/XS-APItest/t/utf8_setup.pl		patch \| blob \| blame \| history
ext/XS-APItest/t/utf8_warn_base.pl		patch \| blob \| blame \| history
inline.h		patch \| blob \| blame \| history
utf8.c		patch \| blob \| blame \| history
utf8.h		patch \| blob \| blame \| history