| $::UTF8_WARN_NONCHAR
| $::UTF8_DISALLOW_SUPER
| $::UTF8_WARN_SUPER
- | $::UTF8_DISALLOW_ABOVE_31_BIT
- | $::UTF8_WARN_ABOVE_31_BIT;
+ | $::UTF8_DISALLOW_PERL_EXTENDED
+ | $::UTF8_WARN_PERL_EXTENDED;
my $look_for_everything_uvchr_to
= $::UNICODE_DISALLOW_SURROGATE
| $::UNICODE_WARN_SURROGATE
| $::UNICODE_WARN_NONCHAR
| $::UNICODE_DISALLOW_SUPER
| $::UNICODE_WARN_SUPER
- | $::UNICODE_DISALLOW_ABOVE_31_BIT
- | $::UNICODE_WARN_ABOVE_31_BIT;
+ | $::UNICODE_DISALLOW_PERL_EXTENDED
+ | $::UNICODE_WARN_PERL_EXTENDED;
foreach ([0, '', '', 'empty'],
[0, 'N', 'N', '1 char'],
my $valid_under_strict = 1;
my $valid_under_c9strict = 1;
- my $valid_for_fits_in_31_bits = 1;
+ my $valid_for_not_extended_utf8 = 1;
if ($n > 0x10FFFF) {
$this_utf8_flags &= ~($::UTF8_DISALLOW_SUPER|$::UTF8_WARN_SUPER);
$valid_under_strict = 0;
$valid_under_c9strict = 0;
if ($n > 2 ** 31 - 1) {
$this_utf8_flags &=
- ~($::UTF8_DISALLOW_ABOVE_31_BIT|$::UTF8_WARN_ABOVE_31_BIT);
- $valid_for_fits_in_31_bits = 0;
+ ~($::UTF8_DISALLOW_PERL_EXTENDED|$::UTF8_WARN_PERL_EXTENDED);
+ $valid_for_not_extended_utf8 = 0;
}
}
elsif (($n >= 0xFDD0 && $n <= 0xFDEF) || ($n & 0xFFFE) == 0xFFFE) {
my $this_uvchr_flags = $look_for_everything_uvchr_to;
if ($n > 2 ** 31 - 1) {
$this_uvchr_flags &=
- ~($::UNICODE_DISALLOW_ABOVE_31_BIT|$::UNICODE_WARN_ABOVE_31_BIT);
+ ~($::UNICODE_DISALLOW_PERL_EXTENDED|$::UNICODE_WARN_PERL_EXTENDED);
}
if ($n > 0x10FFFF) {
$this_uvchr_flags &= ~($::UNICODE_DISALLOW_SUPER|$::UNICODE_WARN_SUPER);
}
elsif (($n >= 0xFDD0 && $n <= 0xFDEF) || ($n & 0xFFFE) == 0xFFFE) {
- $this_uvchr_flags &= ~($::UNICODE_DISALLOW_NONCHAR|$::UNICODE_WARN_NONCHAR);
+ $this_uvchr_flags
+ &= ~($::UNICODE_DISALLOW_NONCHAR|$::UNICODE_WARN_NONCHAR);
}
elsif ($n >= 0xD800 && $n <= 0xDFFF) {
$this_uvchr_flags
- &= ~($::UNICODE_DISALLOW_SURROGATE|$::UNICODE_WARN_SURROGATE);
+ &= ~($::UNICODE_DISALLOW_SURROGATE|$::UNICODE_WARN_SURROGATE);
}
$display_flags = sprintf "0x%x", $this_uvchr_flags;
= $restriction_types{"strict"}{'valid_counts'};
}
- if ($valid_for_fits_in_31_bits) {
- $restriction_types{"fits_in_31_bits"}{'valid_strings'} .= $bytes;
- $restriction_types{"fits_in_31_bits"}{'valid_counts'}++;
+ if ($valid_for_not_extended_utf8) {
+ $restriction_types{"not_extended_utf8"}{'valid_strings'} .= $bytes;
+ $restriction_types{"not_extended_utf8"}{'valid_counts'}++;
}
elsif (! exists
- $restriction_types{"fits_in_31_bits"}{'first_invalid_offset'})
+ $restriction_types{"not_extended_utf8"}{'first_invalid_offset'})
{
- $restriction_types{"fits_in_31_bits"}{'first_invalid_offset'}
- = length $restriction_types{"fits_in_31_bits"}{'valid_strings'};
- $restriction_types{"fits_in_31_bits"}{'first_invalid_count'}
- = $restriction_types{"fits_in_31_bits"}{'valid_counts'};
+ $restriction_types{"not_extended_utf8"}{'first_invalid_offset'}
+ = length $restriction_types{"not_extended_utf8"}{'valid_strings'};
+ $restriction_types{"not_extended_utf8"}{'first_invalid_count'}
+ = $restriction_types{"not_extended_utf8"}{'valid_counts'};
}
}
# and the specially named foo function. But not if there isn't such a
# specially named function. Currently, this is the only tested
# restriction that doesn't have a specially named function
- next if $use_flags eq "" && $restriction eq "fits_in_31_bits";
+ next if $use_flags eq "" && $restriction eq "not_extended_utf8";
# Start building up the name of the function we will test.
my $base_name = "is_";
elsif ($restriction eq "strict") {
$test .= ", $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE";
}
- elsif ($restriction eq "fits_in_31_bits") {
- $test .= ", $::UTF8_DISALLOW_ABOVE_31_BIT";
+ elsif ($restriction eq "not_extended_utf8") {
+ $test .= ", $::UTF8_DISALLOW_PERL_EXTENDED";
}
else {
fail("Internal test error: Unknown restriction "
$::UTF8_DISALLOW_SUPER = 0x1000;
$::UTF8_GOT_SUPER = $UTF8_DISALLOW_SUPER;
$::UTF8_WARN_SUPER = 0x2000;
-$::UTF8_DISALLOW_ABOVE_31_BIT = 0x4000;
-$::UTF8_GOT_ABOVE_31_BIT = $UTF8_DISALLOW_ABOVE_31_BIT;
-$::UTF8_WARN_ABOVE_31_BIT = 0x8000;
+$::UTF8_DISALLOW_PERL_EXTENDED = 0x4000;
+$::UTF8_GOT_PERL_EXTENDED = $UTF8_DISALLOW_PERL_EXTENDED;
+$::UTF8_WARN_PERL_EXTENDED = 0x8000;
$::UTF8_CHECK_ONLY = 0x10000;
$::UTF8_NO_CONFIDENCE_IN_CURLEN_ = 0x20000;
$::UNICODE_WARN_SURROGATE = 0x0001;
$::UNICODE_WARN_NONCHAR = 0x0002;
$::UNICODE_WARN_SUPER = 0x0004;
-$::UNICODE_WARN_ABOVE_31_BIT = 0x0008;
+$::UNICODE_WARN_PERL_EXTENDED = 0x0008;
$::UNICODE_DISALLOW_SURROGATE = 0x0010;
$::UNICODE_DISALLOW_NONCHAR = 0x0020;
$::UNICODE_DISALLOW_SUPER = 0x0040;
-$::UNICODE_DISALLOW_ABOVE_31_BIT = 0x0080;
+$::UNICODE_DISALLOW_PERL_EXTENDED = 0x0080;
W_NONCHAR
D_SUPER
W_SUPER
- D_ABOVE_31_BIT
- W_ABOVE_31_BIT
+ D_PERL_EXTENDED
+ W_PERL_EXTENDED
CHECK_ONLY
NO_CONFIDENCE_IN_CURLEN_
) );
W_SURROGATE
W_NONCHAR
W_SUPER
- W_ABOVE_31_BIT
+ W_PERL_EXTENDED
D_SURROGATE
D_NONCHAR
D_SUPER
- D_ABOVE_31_BIT
+ D_PERL_EXTENDED
) );
$_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x;
if ($will_overflow || $allowed_uv > 0x10FFFF) {
- # Set the SUPER flags; later, we test for ABOVE_31_BIT as well.
+ # Set the SUPER flags; later, we test for PERL_EXTENDED as well.
$utf8n_flag_to_warn = $::UTF8_WARN_SUPER;
$utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER;
$uvchr_flag_to_warn = $::UNICODE_WARN_SUPER;
$uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;;
- # Below, we add the flags for non-above-31 bit to the code points that
- # don't fit that category. Special tests are done for this category
- # in the inner loop.
+ # Below, we add the flags for non-perl_extended to the code points
+ # that don't fit that category. Special tests are done for this
+ # category in the inner loop.
$utf8n_flag_to_warn_complement = $::UTF8_WARN_NONCHAR
|$::UTF8_WARN_SURROGATE;
$utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
\Q may not be portable\E/x;
$non_cp_trailing_text = "is for a non-Unicode code point, may not"
. " be portable";
- $utf8n_flag_to_warn_complement |= $::UTF8_WARN_ABOVE_31_BIT;
- $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_ABOVE_31_BIT;
- $uvchr_flag_to_warn_complement |= $::UNICODE_WARN_ABOVE_31_BIT;
+ $utf8n_flag_to_warn_complement |= $::UTF8_WARN_PERL_EXTENDED;
+ $utf8n_flag_to_disallow_complement
+ |= $::UTF8_DISALLOW_PERL_EXTENDED;
+ $uvchr_flag_to_warn_complement |= $::UNICODE_WARN_PERL_EXTENDED;
$uvchr_flag_to_disallow_complement
- |= $::UNICODE_DISALLOW_ABOVE_31_BIT;
+ |= $::UNICODE_DISALLOW_PERL_EXTENDED;
}
}
elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) {
$utf8n_flag_to_warn_complement = $::UTF8_WARN_NONCHAR
|$::UTF8_WARN_SUPER
- |$::UTF8_WARN_ABOVE_31_BIT;
+ |$::UTF8_WARN_PERL_EXTENDED;
$utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
|$::UTF8_DISALLOW_SUPER
- |$::UTF8_DISALLOW_ABOVE_31_BIT;
+ |$::UTF8_DISALLOW_PERL_EXTENDED;
$uvchr_flag_to_warn_complement = $::UNICODE_WARN_NONCHAR
|$::UNICODE_WARN_SUPER
- |$::UNICODE_WARN_ABOVE_31_BIT;
+ |$::UNICODE_WARN_PERL_EXTENDED;
$uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
|$::UNICODE_DISALLOW_SUPER
- |$::UNICODE_DISALLOW_ABOVE_31_BIT;
+ |$::UNICODE_DISALLOW_PERL_EXTENDED;
$controlling_warning_category = 'surrogate';
}
elsif ( ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF)
$utf8n_flag_to_warn_complement = $::UTF8_WARN_SURROGATE
|$::UTF8_WARN_SUPER
- |$::UTF8_WARN_ABOVE_31_BIT;
+ |$::UTF8_WARN_PERL_EXTENDED;
$utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_SURROGATE
|$::UTF8_DISALLOW_SUPER
- |$::UTF8_DISALLOW_ABOVE_31_BIT;
+ |$::UTF8_DISALLOW_PERL_EXTENDED;
$uvchr_flag_to_warn_complement = $::UNICODE_WARN_SURROGATE
|$::UNICODE_WARN_SUPER
- |$::UNICODE_WARN_ABOVE_31_BIT;
+ |$::UNICODE_WARN_PERL_EXTENDED;
$uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_SURROGATE
|$::UNICODE_DISALLOW_SUPER
- |$::UNICODE_DISALLOW_ABOVE_31_BIT;
+ |$::UNICODE_DISALLOW_PERL_EXTENDED;
$controlling_warning_category = 'nonchar';
}
foreach my $disallow_type (0..2) {
# 0 is don't disallow this type of code point
# 1 is do disallow
- # 2 is do disallow, but only for above 31 bit
+ # 2 is do disallow, but only code points requiring
+ # perl-extended-UTF8
my $disallow_flags;
my $expected_ret;
}
elsif ($disallow_type == 2) {
next if ! requires_extended_utf8($allowed_uv);
- $disallow_flags = $::UTF8_DISALLOW_ABOVE_31_BIT;
+ $disallow_flags = $::UTF8_DISALLOW_PERL_EXTENDED;
$expected_ret = 0;
}
else { # type is 0
$expect_warnings_for_malformed = 0;
}
elsif ($warning_type == 4) { # Like type 3, but uses the
- # above-31-bit flags
+ # PERL_EXTENDED flags
# The complement flags were set up so that the
- # above-31-bit flags have been tested that they don't
+ # PERL_EXTENDED flags have been tested that they don't
# trigger wrongly for too small code points. And the
# flags have been set up so that those small code
# points are tested for being above Unicode. What's
# left to test is that the large code points do
- # trigger the above-31-bit flags.
+ # trigger the PERL_EXTENDED flags.
next if ! requires_extended_utf8($allowed_uv);
next if $controlling_warning_category ne 'non_unicode';
$eval_warn = "no warnings; use warnings 'non_unicode'";
$expect_regular_warnings = 1;
$expect_warnings_for_overflow = 1;
$expect_warnings_for_malformed = 0;
- $this_utf8n_flag_to_warn = $::UTF8_WARN_ABOVE_31_BIT;
+ $this_utf8n_flag_to_warn = $::UTF8_WARN_PERL_EXTENDED;
$this_utf8n_flag_to_disallow
- = $::UTF8_DISALLOW_ABOVE_31_BIT;
- $this_uvchr_flag_to_warn = $::UNICODE_WARN_ABOVE_31_BIT;
+ = $::UTF8_DISALLOW_PERL_EXTENDED;
+ $this_uvchr_flag_to_warn
+ = $::UNICODE_WARN_PERL_EXTENDED;
$this_uvchr_flag_to_disallow
- = $::UNICODE_DISALLOW_ABOVE_31_BIT;
+ = $::UNICODE_DISALLOW_PERL_EXTENDED;
}
else {
die "Unexpected warning type '$warning_type'";
# should emit a message or not. It's tentative
# because, even if we ordinarily would output it, we
# don't if malformations are allowed -- except an
- # overflow is also a SUPER and ABOVE_31_BIT, and if
+ # overflow is also a SUPER and PERL_EXTENDED, and if
# warnings for those are enabled, the overflow
# warning does get raised.
if ( $expect_warnings_for_overflow
&& ( $malformed_allow_type == 0
|| ( $this_warning_flags
& ($::UTF8_WARN_SUPER
- |$::UTF8_WARN_ABOVE_31_BIT))))
+ |$::UTF8_WARN_PERL_EXTENDED))))
{
push @expected_warnings, $overflow_msg_pattern;
}
for (my $i = @expected_return_flags - 1; $i >= 0; $i--) {
if ($expected_return_flags[$i] & $returned_flags) {
if ($expected_return_flags[$i]
- == $::UTF8_DISALLOW_ABOVE_31_BIT)
+ == $::UTF8_GOT_PERL_EXTENDED)
{
pass(" Expected and got return flag for"
- . " above_31_bit");
+ . " PERL_EXTENDED");
}
# The first entries in this are
# malformations
PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
- |UTF8_DISALLOW_ABOVE_31_BIT)));
+ |UTF8_DISALLOW_PERL_EXTENDED)));
if (flags == 0) {
return is_utf8_string(s, len);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
{
return is_strict_utf8_string(s, len);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
{
return is_c9strict_utf8_string(s, len);
PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
- |UTF8_DISALLOW_ABOVE_31_BIT)));
+ |UTF8_DISALLOW_PERL_EXTENDED)));
if (flags == 0) {
return is_utf8_string_loclen(s, len, ep, el);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
{
return is_strict_utf8_string_loclen(s, len, ep, el);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
{
return is_c9strict_utf8_string_loclen(s, len, ep, el);
PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
- |UTF8_DISALLOW_ABOVE_31_BIT)));
+ |UTF8_DISALLOW_PERL_EXTENDED)));
if (s >= e || s + UTF8SKIP(s) <= e) {
return FALSE;
cp_above_legal_max, uv, MAX_NON_DEPRECATED_CP);
}
if ( (flags & UNICODE_WARN_SUPER)
- || ( UNICODE_IS_ABOVE_31_BIT(uv)
- && (flags & UNICODE_WARN_ABOVE_31_BIT)))
+ || ( UNICODE_IS_PERL_EXTENDED(uv)
+ && (flags & UNICODE_WARN_PERL_EXTENDED)))
{
Perl_ck_warner_d(aTHX_ packWARN(WARN_NON_UNICODE),
/* Choose the more dire applicable warning */
- (UNICODE_IS_ABOVE_31_BIT(uv))
+ (UNICODE_IS_PERL_EXTENDED(uv))
? above_31_bit_cp_format
: super_cp_format,
uv);
}
if ( (flags & UNICODE_DISALLOW_SUPER)
- || ( UNICODE_IS_ABOVE_31_BIT(uv)
- && (flags & UNICODE_DISALLOW_ABOVE_31_BIT)))
+ || ( UNICODE_IS_PERL_EXTENDED(uv)
+ && (flags & UNICODE_DISALLOW_PERL_EXTENDED)))
{
return NULL;
}
PERL_ARGS_ASSERT__IS_UTF8_CHAR_HELPER;
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
- |UTF8_DISALLOW_ABOVE_31_BIT)));
+ |UTF8_DISALLOW_PERL_EXTENDED)));
assert(! UTF8_IS_INVARIANT(*s));
/* A variant char must begin with a start byte */
return 0; /* Above Unicode */
}
- if ( (flags & UTF8_DISALLOW_ABOVE_31_BIT)
+ if ( (flags & UTF8_DISALLOW_PERL_EXTENDED)
&& UNLIKELY(is_utf8_cp_above_31_bits(s, e)))
{
return 0; /* Above 31 bits */
&& ((flags & ( UTF8_DISALLOW_NONCHAR
|UTF8_DISALLOW_SURROGATE
|UTF8_DISALLOW_SUPER
- |UTF8_DISALLOW_ABOVE_31_BIT
+ |UTF8_DISALLOW_PERL_EXTENDED
|UTF8_WARN_NONCHAR
|UTF8_WARN_SURROGATE
|UTF8_WARN_SUPER
- |UTF8_WARN_ABOVE_31_BIT))
+ |UTF8_WARN_PERL_EXTENDED))
/* In case of a malformation, 'uv' is not valid, and has
* been changed to something in the Unicode range.
* Currently we don't output a deprecation message if there
/* Overflow means also got a super and are using Perl's
* extended UTF-8, but we handle all three cases here */
possible_problems
- &= ~(UTF8_GOT_OVERFLOW|UTF8_GOT_SUPER|UTF8_GOT_ABOVE_31_BIT);
+ &= ~(UTF8_GOT_OVERFLOW|UTF8_GOT_SUPER|UTF8_GOT_PERL_EXTENDED);
*errors |= UTF8_GOT_OVERFLOW;
/* But the API says we flag all errors found */
*errors |= UTF8_GOT_SUPER;
}
if (flags
- & (UTF8_WARN_ABOVE_31_BIT|UTF8_DISALLOW_ABOVE_31_BIT))
+ & (UTF8_WARN_PERL_EXTENDED|UTF8_DISALLOW_PERL_EXTENDED))
{
- *errors |= UTF8_GOT_ABOVE_31_BIT;
+ *errors |= UTF8_GOT_PERL_EXTENDED;
}
/* Disallow if any of the three categories say to */
if ( ! (flags & UTF8_ALLOW_OVERFLOW)
|| (flags & ( UTF8_DISALLOW_SUPER
- |UTF8_DISALLOW_ABOVE_31_BIT)))
+ |UTF8_DISALLOW_PERL_EXTENDED)))
{
disallowed = TRUE;
}
* are on, because this code point is above IV_MAX */
if ( ckWARN_d(WARN_DEPRECATED)
|| ! (flags & UTF8_ALLOW_OVERFLOW)
- || (flags & (UTF8_WARN_SUPER|UTF8_WARN_ABOVE_31_BIT)))
+ || (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED)))
{
/* The warnings code explicitly says it doesn't handle the
* test for these after the regular SUPER ones, and before
* possibly bailing out, so that the slightly more dire warning
* will override the regular one. */
- if ( (flags & (UTF8_WARN_ABOVE_31_BIT
+ if ( (flags & (UTF8_WARN_PERL_EXTENDED
|UTF8_WARN_SUPER
- |UTF8_DISALLOW_ABOVE_31_BIT))
+ |UTF8_DISALLOW_PERL_EXTENDED))
&& ( ( UNLIKELY(orig_problems & UTF8_GOT_TOO_SHORT)
&& UNLIKELY(is_utf8_cp_above_31_bits(
adjusted_s0,
adjusted_send)))
|| ( LIKELY(! (orig_problems & UTF8_GOT_TOO_SHORT))
- && UNLIKELY(UNICODE_IS_ABOVE_31_BIT(uv)))))
+ && UNLIKELY(UNICODE_IS_PERL_EXTENDED(uv)))))
{
if ( ! (flags & UTF8_CHECK_ONLY)
- && (flags & (UTF8_WARN_ABOVE_31_BIT|UTF8_WARN_SUPER))
+ && (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
&& ckWARN_d(WARN_NON_UNICODE))
{
pack_warn = packWARN(WARN_NON_UNICODE);
}
}
- if (flags & ( UTF8_WARN_ABOVE_31_BIT
- |UTF8_DISALLOW_ABOVE_31_BIT))
+ if (flags & ( UTF8_WARN_PERL_EXTENDED
+ |UTF8_DISALLOW_PERL_EXTENDED))
{
- *errors |= UTF8_GOT_ABOVE_31_BIT;
+ *errors |= UTF8_GOT_PERL_EXTENDED;
- if (flags & UTF8_DISALLOW_ABOVE_31_BIT) {
+ if (flags & UTF8_DISALLOW_PERL_EXTENDED) {
disallowed = TRUE;
}
}
* went up to 2 ** 31 - 1. Note that these all overflow a signed 32-bit word,
* The first byte of these code points is FE or FF on ASCII platforms. If the
* first byte is FF, it will overflow a 32-bit word. */
-#define UTF8_DISALLOW_ABOVE_31_BIT 0x4000
-#define UTF8_GOT_ABOVE_31_BIT UTF8_DISALLOW_ABOVE_31_BIT
-#define UTF8_WARN_ABOVE_31_BIT 0x8000
+#define UTF8_DISALLOW_PERL_EXTENDED 0x4000
+#define UTF8_GOT_PERL_EXTENDED UTF8_DISALLOW_PERL_EXTENDED
+#define UTF8_WARN_PERL_EXTENDED 0x8000
/* For back compat, these old names are misleading for UTF_EBCDIC */
-#define UTF8_DISALLOW_FE_FF UTF8_DISALLOW_ABOVE_31_BIT
-#define UTF8_WARN_FE_FF UTF8_WARN_ABOVE_31_BIT
+#define UTF8_DISALLOW_ABOVE_31_BIT UTF8_DISALLOW_PERL_EXTENDED
+#define UTF8_GOT_ABOVE_31_BIT UTF8_GOT_PERL_EXTENDED
+#define UTF8_WARN_ABOVE_31_BIT UTF8_WARN_PERL_EXTENDED
+#define UTF8_DISALLOW_FE_FF UTF8_DISALLOW_PERL_EXTENDED
+#define UTF8_WARN_FE_FF UTF8_WARN_PERL_EXTENDED
#define UTF8_CHECK_ONLY 0x10000
#define _UTF8_NO_CONFIDENCE_IN_CURLEN 0x20000 /* Internal core use only */
* let's be conservative and do as Unicode says. */
#define PERL_UNICODE_MAX 0x10FFFF
-#define UNICODE_WARN_SURROGATE 0x0001 /* UTF-16 surrogates */
-#define UNICODE_WARN_NONCHAR 0x0002 /* Non-char code points */
-#define UNICODE_WARN_SUPER 0x0004 /* Above 0x10FFFF */
-#define UNICODE_WARN_ABOVE_31_BIT 0x0008 /* Above 0x7FFF_FFFF */
-#define UNICODE_DISALLOW_SURROGATE 0x0010
-#define UNICODE_DISALLOW_NONCHAR 0x0020
-#define UNICODE_DISALLOW_SUPER 0x0040
-#define UNICODE_DISALLOW_ABOVE_31_BIT 0x0080
+#define UNICODE_WARN_SURROGATE 0x0001 /* UTF-16 surrogates */
+#define UNICODE_WARN_NONCHAR 0x0002 /* Non-char code points */
+#define UNICODE_WARN_SUPER 0x0004 /* Above 0x10FFFF */
+#define UNICODE_WARN_PERL_EXTENDED 0x0008 /* Above 0x7FFF_FFFF */
+#define UNICODE_WARN_ABOVE_31_BIT UNICODE_WARN_PERL_EXTENDED
+#define UNICODE_DISALLOW_SURROGATE 0x0010
+#define UNICODE_DISALLOW_NONCHAR 0x0020
+#define UNICODE_DISALLOW_SUPER 0x0040
+#define UNICODE_DISALLOW_PERL_EXTENDED 0x0080
+#define UNICODE_DISALLOW_ABOVE_31_BIT UNICODE_DISALLOW_PERL_EXTENDED
#define UNICODE_WARN_ILLEGAL_C9_INTERCHANGE \
(UNICODE_WARN_SURROGATE|UNICODE_WARN_SUPER)
#define UNICODE_WARN_ILLEGAL_INTERCHANGE \
&& UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv)))
#define UNICODE_IS_SUPER(uv) ((UV) (uv) > PERL_UNICODE_MAX)
-#define UNICODE_IS_ABOVE_31_BIT(uv) ((UV) (uv) > 0x7FFFFFFF)
+#define UNICODE_IS_PERL_EXTENDED(uv) ((UV) (uv) > 0x7FFFFFFF)
#define LATIN_SMALL_LETTER_SHARP_S LATIN_SMALL_LETTER_SHARP_S_NATIVE
#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS \