Add uvchr_to_utf8_flags_msgs()
authorKarl Williamson <khw@cpan.org>
Sun, 4 Feb 2018 05:14:22 +0000 (22:14 -0700)
committerKarl Williamson <khw@cpan.org>
Wed, 7 Feb 2018 18:19:14 +0000 (11:19 -0700)
This is propmpted by Encode's needs.  When called with the proper
parameter, it returns any warnings instead of displaying them directly.

embed.fnc
embed.h
ext/XS-APItest/APItest.xs
ext/XS-APItest/t/utf8_warn_base.pl
proto.h
utf8.c
utf8.h

index 763a17c..ce876eb 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1878,7 +1878,9 @@ Ap        |UV     |utf8n_to_uvuni|NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 fl
 Adm    |U8*    |uvchr_to_utf8  |NN U8 *d|UV uv
 Ap     |U8*    |uvuni_to_utf8  |NN U8 *d|UV uv
 Adm    |U8*    |uvchr_to_utf8_flags    |NN U8 *d|UV uv|UV flags
-Apd    |U8*    |uvoffuni_to_utf8_flags |NN U8 *d|UV uv|const UV flags
+AdmM   |U8*    |uvchr_to_utf8_flags_msgs|NN U8 *d|UV uv|UV flags|NULLOK HV ** msgs
+Apod   |U8*    |uvoffuni_to_utf8_flags |NN U8 *d|UV uv|const UV flags
+ApM    |U8*    |uvoffuni_to_utf8_flags_msgs|NN U8 *d|UV uv|const UV flags|NULLOK HV** msgs
 Ap     |U8*    |uvuni_to_utf8_flags    |NN U8 *d|UV uv|UV flags
 Apd    |char*  |pv_uni_display |NN SV *dsv|NN const U8 *spv|STRLEN len|STRLEN pvlim|UV flags
 ApdR   |char*  |sv_uni_display |NN SV *dsv|NN SV *ssv|STRLEN pvlim|UV flags
diff --git a/embed.h b/embed.h
index 5f2184a..1405176 100644 (file)
--- a/embed.h
+++ b/embed.h
 #define utf8_to_uvuni_buf(a,b,c)       Perl_utf8_to_uvuni_buf(aTHX_ a,b,c)
 #define utf8n_to_uvchr_msgs(a,b,c,d,e,f)       Perl_utf8n_to_uvchr_msgs(aTHX_ a,b,c,d,e,f)
 #define utf8n_to_uvuni(a,b,c,d)        Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
-#define uvoffuni_to_utf8_flags(a,b,c)  Perl_uvoffuni_to_utf8_flags(aTHX_ a,b,c)
+#define uvoffuni_to_utf8_flags_msgs(a,b,c,d)   Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d)
 #define uvuni_to_utf8(a,b)     Perl_uvuni_to_utf8(aTHX_ a,b)
 #define uvuni_to_utf8_flags(a,b,c)     Perl_uvuni_to_utf8_flags(aTHX_ a,b,c)
 #define valid_utf8_to_uvchr    Perl_valid_utf8_to_uvchr
index 5e67e7f..b12cc82 100644 (file)
@@ -1523,6 +1523,36 @@ test_uvchr_to_utf8_flags(uv, flags)
     OUTPUT:
         RETVAL
 
+AV *
+test_uvchr_to_utf8_flags_msgs(uv, flags)
+
+        SV *uv
+        SV *flags
+    PREINIT:
+        U8 dest[UTF8_MAXBYTES + 1];
+        U8 *ret;
+
+    CODE:
+        HV *msgs = NULL;
+        RETVAL = newAV();
+        sv_2mortal((SV*)RETVAL);
+
+        ret = uvchr_to_utf8_flags_msgs(dest, SvUV(uv), SvUV(flags), &msgs);
+
+        if (ret) {
+            av_push(RETVAL, newSVpvn((char *) dest, ret - dest));
+        }
+        else {
+            av_push(RETVAL,  &PL_sv_undef);
+        }
+
+        if (msgs) {
+            av_push(RETVAL, newRV_noinc((SV*)msgs));
+        }
+
+    OUTPUT:
+        RETVAL
+
 MODULE = XS::APItest:Overload  PACKAGE = XS::APItest::Overload
 
 void
index b1935d9..0c9e20b 100644 (file)
@@ -1353,20 +1353,19 @@ foreach my $test (@tests) {
               next if $skip_most_tests;
             }
 
-            # This tests three functions.  utf8n_to_uvchr_error,
-            # utf8n_to_uvchr_msgs, and uvchr_to_utf8_flags.  But only the
-            # first two are variants of each other.  We use a loop
-            # 'which_func' to determine which of these.  uvchr_to_utf8_flags
-            # is done separately at the end of each iteration, only when
-            # which_func is 0.  which_func is numeric in part so we don't
-            # have to type in the function name and risk misspelling it
-            # somewhere, and also it sets whether we are expecting warnings
-            # or not in certain places.  The _msgs() version of the function
-            # expects warnings even if lexical ones are turned off, so by
-            # making its which_func == 1, we can say we want warnings;
-            # whereas the other one with the value 0, doesn't get them.
+            # This tests four functions: utf8n_to_uvchr_error,
+            # utf8n_to_uvchr_msgs, uvchr_to_utf8_flags, and
+            # uvchr_to_utf8_msgs.  The first two are variants of each other,
+            # and the final two also form a pair.  We use a loop 'which_func'
+            # to determine which of each pair is being tested.  The main loop
+            # tests either the first and third, or the 2nd and fourth.
+            # which_func is sets whether we are expecting warnings or not in
+            # certain places.  The _msgs() version of the functions expects
+            # warnings even if lexical ones are turned off, so by making its
+            # which_func == 1, we can say we want warnings; whereas the other
+            # one with the value 0, doesn't get them.
             for my $which_func (0, 1) {
-              my $func = ($which_func)
+              my $utf8_func = ($which_func)
                           ? 'utf8n_to_uvchr_msgs'
                           : 'utf8n_to_uvchr_error';
 
@@ -1594,7 +1593,7 @@ foreach my $test (@tests) {
                         }
                     }
 
-                    my $this_name = "$func() $testname: ";
+                    my $this_name = "$utf8_func() $testname: ";
                     my @scratch_expected_return_flags = @expected_return_flags;
                     if (! $initially_malformed) {
                         $this_name .= ($disallowed)
@@ -1614,7 +1613,7 @@ foreach my $test (@tests) {
                     my $this_flags
                         = $allow_flags|$this_warning_flags|$this_disallow_flags;
                     my $eval_text =      "$eval_warn; \$ret_ref"
-                            . " = test_$func("
+                            . " = test_$utf8_func("
                             . "'$this_bytes', $this_length, $this_flags)";
                     eval "$eval_text";
                     if (! ok ($@ eq "", "$this_name: eval succeeded"))
@@ -1713,8 +1712,9 @@ foreach my $test (@tests) {
                                   pass("flag for returned msg is expected");
                               }
                               else {
-                                  fail("flag for returned msg is expected: "
-                                 . flags_to_text($flag, \@utf8n_flags_to_text));
+                                  fail("flag ("
+                                     . flags_to_text($flag, \@utf8n_flags_to_text)
+                                     . ") for returned msg is expected");
                               }
                             }
 
@@ -1723,7 +1723,7 @@ foreach my $test (@tests) {
                                           "returned category for msg isn't 0");
                         }
 
-                        ok(@warnings_gotten == 0, "$func raised no warnings;"
+                        ok(@warnings_gotten == 0, "$utf8_func raised no warnings;"
                               . " the next tests are for ones in the returned"
                               . " variable")
                             or diag join "\n", "The unexpected warnings were:",
@@ -1745,7 +1745,7 @@ foreach my $test (@tests) {
                         $tested_CHECK_ONLY = 1;
                         my $this_flags = $this_disallow_flags|$::UTF8_CHECK_ONLY;
                         my $eval_text = "use warnings; \$ret_ref ="
-                                      . " test_$func('"
+                                      . " test_$utf8_func('"
                                       . "$this_bytes', $this_length,"
                                       . " $this_flags)";
                         eval $eval_text;
@@ -1774,7 +1774,10 @@ foreach my $test (@tests) {
                     # existing code point, it hasn't overflowed, and isn't
                     # malformed.
                     next if @malformation_names;
-                    next if $which_func;
+
+                    my $uvchr_func = ($which_func)
+                                     ? 'uvchr_to_utf8_flags_msgs'
+                                     : 'uvchr_to_utf8_flags';
 
                     $this_warning_flags = ($use_warn_flag)
                                           ? $this_uvchr_flag_to_warn
@@ -1790,10 +1793,10 @@ foreach my $test (@tests) {
                                           ? 'with flag for raising warnings'
                                           : 'no flag for raising warnings');
 
-                    $this_name = "uvchr_to_utf8_flags() $testname: "
-                                            . (($disallowed)
-                                                ? 'disallowed'
-                                                : 'allowed');
+                    $this_name = "$uvchr_func() $testname: "
+                                        . (($disallowed)
+                                           ? 'disallowed'
+                                           : 'allowed');
                     $this_name .= ", $eval_warn";
                     $this_name .= ", " . ((  $this_warning_flags
                                            & $this_uvchr_flag_to_warn)
@@ -1804,7 +1807,7 @@ foreach my $test (@tests) {
                     my $ret;
                     $this_flags = $this_warning_flags|$this_disallow_flags;
                     $eval_text = "$eval_warn; \$ret ="
-                            . " test_uvchr_to_utf8_flags("
+                            . " test_$uvchr_func("
                             . "$allowed_uv, $this_flags)";
                     eval "$eval_text";
                     if (! ok ($@ eq "", "$this_name: eval succeeded"))
@@ -1813,6 +1816,46 @@ foreach my $test (@tests) {
                            . uvchr_display_call($eval_text);
                         next;
                     }
+
+                    if ($which_func) {
+                        if (defined $ret->[1]) {
+                            my @returned_warnings;
+                            push @returned_warnings, $ret->[1]{'text'};
+                            my $text = $ret->[1]{'text'};
+                            my $flag = $ret->[1]{'flag_bit'};
+                            my $category = $ret->[1]{'warning_category'};
+
+                            if (! ok(($flag & ($flag-1)) == 0,
+                                        "flag for returned msg is a single bit"))
+                            {
+                                diag sprintf("flags are %x; msg=%s", $flag, $text);
+                            }
+                            else {
+                                if ($flag & $this_uvchr_flag_to_disallow) {
+                                    pass("flag for returned msg is expected");
+                                }
+                                else {
+                                    fail("flag ("
+                                        . flags_to_text($flag, \@utf8n_flags_to_text)
+                                        . ") for returned msg is expected");
+                                }
+                            }
+
+                            # In perl space, don't know the category numbers
+                            isnt($category, 0,
+                                            "returned category for msg isn't 0");
+
+                            ok(@warnings_gotten == 0, "$uvchr_func raised no warnings;"
+                                . " the next tests are for ones in the returned"
+                                . " variable")
+                                or diag join "\n", "The unexpected warnings were:",
+                                                                @warnings_gotten;
+                            @warnings_gotten = @returned_warnings;
+                        }
+
+                        $ret = $ret->[0];
+                    }
+
                     if ($disallowed) {
                         is($ret, undef, "    And returns undef")
                           or diag "Call was: " . uvchr_display_call($eval_text);
diff --git a/proto.h b/proto.h
index 2e3f965..9f6d0df 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -3677,9 +3677,13 @@ PERL_CALLCONV void       Perl_utilize(pTHX_ int aver, I32 floor, OP* version, OP* idop
        assert(idop)
 /* PERL_CALLCONV U8*   uvchr_to_utf8(pTHX_ U8 *d, UV uv); */
 /* PERL_CALLCONV U8*   uvchr_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags); */
+/* PERL_CALLCONV U8*   uvchr_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, UV flags, HV ** msgs); */
 PERL_CALLCONV U8*      Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags);
 #define PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS        \
        assert(d)
+PERL_CALLCONV U8*      Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs);
+#define PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS_MSGS   \
+       assert(d)
 PERL_CALLCONV U8*      Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv);
 #define PERL_ARGS_ASSERT_UVUNI_TO_UTF8 \
        assert(d)
diff --git a/utf8.c b/utf8.c
index 18367f5..140272b 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -140,6 +140,14 @@ For details, see the description for L</uvchr_to_utf8_flags>.
 =cut
 */
 
+U8 *
+Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags)
+{
+    PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS;
+
+    return uvoffuni_to_utf8_flags_msgs(d, uv, flags, NULL);
+}
+
 /* All these formats take a single UV code point argument */
 const char surrogate_cp_format[] = "UTF-16 surrogate U+%04" UVXf;
 const char nonchar_cp_format[]   = "Unicode non-character U+%04" UVXf
@@ -150,22 +158,38 @@ const char perl_extended_cp_format[] = "Code point 0x%" UVXf " is not"        \
                                        " Unicode, requires a Perl extension," \
                                        " and so is not portable";
 
-#define HANDLE_UNICODE_SURROGATE(uv, flags)                         \
+#define HANDLE_UNICODE_SURROGATE(uv, flags, msgs)                   \
     STMT_START {                                                    \
         if (flags & UNICODE_WARN_SURROGATE) {                       \
-            Perl_ck_warner_d(aTHX_ packWARN(WARN_SURROGATE),        \
-                                   surrogate_cp_format, uv);        \
+            U32 category = packWARN(WARN_SURROGATE);                \
+            const char * format = surrogate_cp_format;              \
+            if (msgs) {                                             \
+                *msgs = new_msg_hv(Perl_form(aTHX_ format, uv),     \
+                                   category,                        \
+                                   UNICODE_GOT_SURROGATE);          \
+            }                                                       \
+            else {                                                  \
+                Perl_ck_warner_d(aTHX_ category, format, uv);       \
+            }                                                       \
         }                                                           \
         if (flags & UNICODE_DISALLOW_SURROGATE) {                   \
             return NULL;                                            \
         }                                                           \
     } STMT_END;
 
-#define HANDLE_UNICODE_NONCHAR(uv, flags)                           \
+#define HANDLE_UNICODE_NONCHAR(uv, flags, msgs)                     \
     STMT_START {                                                    \
         if (flags & UNICODE_WARN_NONCHAR) {                         \
-            Perl_ck_warner_d(aTHX_ packWARN(WARN_NONCHAR),          \
-                                  nonchar_cp_format, uv);          \
+            U32 category = packWARN(WARN_NONCHAR);                  \
+            const char * format = nonchar_cp_format;                \
+            if (msgs) {                                             \
+                *msgs = new_msg_hv(Perl_form(aTHX_ format, uv),     \
+                                   category,                        \
+                                   UNICODE_GOT_NONCHAR);            \
+            }                                                       \
+            else {                                                  \
+                Perl_ck_warner_d(aTHX_ category, format, uv);       \
+            }                                                       \
         }                                                           \
         if (flags & UNICODE_DISALLOW_NONCHAR) {                     \
             return NULL;                                            \
@@ -178,10 +202,62 @@ const char perl_extended_cp_format[] = "Code point 0x%" UVXf " is not"        \
 #define MARK    UTF_CONTINUATION_MARK
 #define MASK    UTF_CONTINUATION_MASK
 
+/*
+=for apidoc uvchr_to_utf8_flags_msgs
+
+THIS FUNCTION SHOULD BE USED IN ONLY VERY SPECIALIZED CIRCUMSTANCES.
+
+Most code should use C<L</uvchr_to_utf8_flags>()> rather than call this directly.
+
+This function is for code that wants any warning and/or error messages to be
+returned to the caller rather than be displayed.  All messages that would have
+been displayed if all lexcial warnings are enabled will be returned.
+
+It is just like C<L</uvchr_to_utf8_flags>> but it takes an extra parameter
+placed after all the others, C<msgs>.  If this parameter is 0, this function
+behaves identically to C<L</uvchr_to_utf8_flags>>.  Otherwise, C<msgs> should
+be a pointer to an C<HV *> variable, in which this function creates a new HV to
+contain any appropriate messages.  The hash has three key-value pairs, as
+follows:
+
+=over 4
+
+=item C<text>
+
+The text of the message as a C<SVpv>.
+
+=item C<warn_categories>
+
+The warning category (or categories) packed into a C<SVuv>.
+
+=item C<flag>
+
+A single flag bit associated with this message, in a C<SVuv>.
+The bit corresponds to some bit in the C<*errors> return value,
+such as C<UNICODE_GOT_SURROGATE>.
+
+=back
+
+It's important to note that specifying this parameter as non-null will cause
+any warnings this function would otherwise generate to be suppressed, and
+instead be placed in C<*msgs>.  The caller can check the lexical warnings state
+(or not) when choosing what to do with the returned messages.
+
+The caller, of course, is responsible for freeing any returned HV.
+
+=cut
+*/
+
+/* Undocumented; we don't want people using this.  Instead they should use
+ * uvchr_to_utf8_flags_msgs() */
 U8 *
-Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags)
+Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV flags, HV** msgs)
 {
-    PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS;
+    PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS_MSGS;
+
+    if (msgs) {
+        *msgs = NULL;
+    }
 
     if (OFFUNI_IS_INVARIANT(uv)) {
        *d++ = LATIN1_TO_NATIVE(uv);
@@ -213,10 +289,10 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags)
             if (UNLIKELY(   UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv)
                          || UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv)))
             {
-                HANDLE_UNICODE_NONCHAR(uv, flags);
+                HANDLE_UNICODE_NONCHAR(uv, flags, msgs);
             }
             else if (UNLIKELY(UNICODE_IS_SURROGATE(uv))) {
-                HANDLE_UNICODE_SURROGATE(uv, flags);
+                HANDLE_UNICODE_SURROGATE(uv, flags, msgs);
             }
         }
 #endif
@@ -234,17 +310,31 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags)
         if (UNLIKELY(uv > MAX_EXTERNALLY_LEGAL_CP)) {
             Perl_croak(aTHX_ cp_above_legal_max, uv, MAX_EXTERNALLY_LEGAL_CP);
         }
-        if (      (flags & UNICODE_WARN_SUPER)
-            || (  (flags & UNICODE_WARN_PERL_EXTENDED)
+        if (       (flags & UNICODE_WARN_SUPER)
+            || (   (flags & UNICODE_WARN_PERL_EXTENDED)
                 && UNICODE_IS_PERL_EXTENDED(uv)))
         {
-            Perl_ck_warner_d(aTHX_ packWARN(WARN_NON_UNICODE),
+            const char * format = super_cp_format;
+            U32 category = packWARN(WARN_NON_UNICODE);
+            U32 flag = UNICODE_GOT_SUPER;
+
+            /* Choose the more dire applicable warning */
+            if (UNICODE_IS_PERL_EXTENDED(uv)) {
+                format = perl_extended_cp_format;
+                if (flags & (UNICODE_WARN_PERL_EXTENDED
+                            |UNICODE_DISALLOW_PERL_EXTENDED))
+                {
+                    flag = UNICODE_GOT_PERL_EXTENDED;
+                }
+            }
 
-              /* Choose the more dire applicable warning */
-              (UNICODE_IS_PERL_EXTENDED(uv))
-              ? perl_extended_cp_format
-              : super_cp_format,
-             uv);
+            if (msgs) {
+                *msgs = new_msg_hv(Perl_form(aTHX_ format, uv),
+                                   category, flag);
+            }
+            else {
+                Perl_ck_warner_d(aTHX_ packWARN(WARN_NON_UNICODE), format, uv);
+            }
         }
         if (       (flags & UNICODE_DISALLOW_SUPER)
             || (   (flags & UNICODE_DISALLOW_PERL_EXTENDED)
@@ -254,7 +344,7 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags)
         }
     }
     else if (UNLIKELY(UNICODE_IS_END_PLANE_NONCHAR_GIVEN_NOT_SUPER(uv))) {
-        HANDLE_UNICODE_NONCHAR(uv, flags);
+        HANDLE_UNICODE_NONCHAR(uv, flags, msgs);
     }
 
     /* Test for and handle 4-byte result.   In the test immediately below, the
@@ -273,10 +363,10 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, const UV flags)
                    characters.  The end-plane non-characters for EBCDIC were
                    handled just above */
         if (UNLIKELY(UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv))) {
-            HANDLE_UNICODE_NONCHAR(uv, flags);
+            HANDLE_UNICODE_NONCHAR(uv, flags, msgs);
         }
         else if (UNLIKELY(UNICODE_IS_SURROGATE(uv))) {
-            HANDLE_UNICODE_SURROGATE(uv, flags);
+            HANDLE_UNICODE_SURROGATE(uv, flags, msgs);
         }
 #endif
 
@@ -5937,7 +6027,7 @@ Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
 {
     PERL_ARGS_ASSERT_UVUNI_TO_UTF8;
 
-    return Perl_uvoffuni_to_utf8_flags(aTHX_ d, uv, 0);
+    return uvoffuni_to_utf8_flags(d, uv, 0);
 }
 
 /*
diff --git a/utf8.h b/utf8.h
index cfcdf84..828d1d1 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -66,9 +66,13 @@ the string is invariant.
 #define is_ascii_string(s, len)     is_utf8_invariant_string(s, len)
 #define is_invariant_string(s, len) is_utf8_invariant_string(s, len)
 
+#define uvoffuni_to_utf8_flags(d,uv,flags)                                     \
+                               uvoffuni_to_utf8_flags_msgs(d, uv, flags, 0)
 #define uvchr_to_utf8(a,b)          uvchr_to_utf8_flags(a,b,0)
 #define uvchr_to_utf8_flags(d,uv,flags)                                        \
-                            uvoffuni_to_utf8_flags(d,NATIVE_TO_UNI(uv),flags)
+                                    uvchr_to_utf8_flags_msgs(d,uv,flags, 0)
+#define uvchr_to_utf8_flags_msgs(d,uv,flags,msgs)                              \
+                uvoffuni_to_utf8_flags_msgs(d,NATIVE_TO_UNI(uv),flags, msgs)
 #define utf8_to_uvchr_buf(s, e, lenp)                                          \
                      utf8n_to_uvchr(s, (U8*)(e) - (U8*)(s), lenp,              \
                                     ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY)
@@ -932,6 +936,12 @@ point's representation.
 #define UNICODE_DISALLOW_SUPER         0x0040
 #define UNICODE_DISALLOW_PERL_EXTENDED 0x0080
 #define UNICODE_DISALLOW_ABOVE_31_BIT  UNICODE_DISALLOW_PERL_EXTENDED
+
+#define UNICODE_GOT_SURROGATE       UNICODE_DISALLOW_SURROGATE
+#define UNICODE_GOT_NONCHAR         UNICODE_DISALLOW_NONCHAR
+#define UNICODE_GOT_SUPER           UNICODE_DISALLOW_SUPER
+#define UNICODE_GOT_PERL_EXTENDED   UNICODE_DISALLOW_PERL_EXTENDED
+
 #define UNICODE_WARN_ILLEGAL_C9_INTERCHANGE                                   \
                                   (UNICODE_WARN_SURROGATE|UNICODE_WARN_SUPER)
 #define UNICODE_WARN_ILLEGAL_INTERCHANGE                                      \