This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Move IN6ADDR_ANY and IN6ADDR_LOOPBACK to @EXPORT_OK in Socket.pm, requested by Nicholas
[perl5.git] / utf8.c
diff --git a/utf8.c b/utf8.c
index 7bf2e15..6276308 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -547,9 +547,9 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
 #define UTF8_WARN_EMPTY                                 1
 #define UTF8_WARN_CONTINUATION                  2
 #define UTF8_WARN_NON_CONTINUATION              3
-#define UTF8_WARN_SHORT                                 5
-#define UTF8_WARN_OVERFLOW                      6
-#define UTF8_WARN_LONG                          8
+#define UTF8_WARN_SHORT                                 4
+#define UTF8_WARN_OVERFLOW                      5
+#define UTF8_WARN_LONG                          6
 
     if (curlen == 0 &&
        !(flags & UTF8_ALLOW_EMPTY)) {
@@ -580,7 +580,7 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
 #else
     if (uv == 0xfe || uv == 0xff) {
        if (flags & (UTF8_WARN_SUPER|UTF8_WARN_FE_FF)) {
-           sv = sv_2mortal(newSVpvf_nocontext("Code point beginning with byte 0x%02"UVXf" is not Unicode, and not portable", uv));
+           sv = sv_2mortal(Perl_newSVpvf(aTHX_ "Code point beginning with byte 0x%02"UVXf" is not Unicode, and not portable", uv));
            flags &= ~UTF8_WARN_SUPER;  /* Only warn once on this problem */
        }
        if (flags & (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_FE_FF)) {
@@ -651,7 +651,7 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
     } else if (flags & (UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_WARN_ILLEGAL_INTERCHANGE)) {
        if (UNICODE_IS_SURROGATE(uv)) {
            if ((flags & (UTF8_WARN_SURROGATE|UTF8_CHECK_ONLY)) == UTF8_WARN_SURROGATE) {
-               sv = sv_2mortal(newSVpvf_nocontext("UTF-16 surrogate U+%04"UVXf"", uv));
+               sv = sv_2mortal(Perl_newSVpvf(aTHX_ "UTF-16 surrogate U+%04"UVXf"", uv));
            }
            if (flags & UTF8_DISALLOW_SURROGATE) {
                goto disallowed;
@@ -659,7 +659,7 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
        }
        else if (UNICODE_IS_NONCHAR(uv)) {
            if ((flags & (UTF8_WARN_NONCHAR|UTF8_CHECK_ONLY)) == UTF8_WARN_NONCHAR ) {
-               sv = sv_2mortal(newSVpvf_nocontext("Unicode non-character U+%04"UVXf" is illegal for open interchange", uv));
+               sv = sv_2mortal(Perl_newSVpvf(aTHX_ "Unicode non-character U+%04"UVXf" is illegal for open interchange", uv));
            }
            if (flags & UTF8_DISALLOW_NONCHAR) {
                goto disallowed;
@@ -667,7 +667,7 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
        }
        else if ((uv > PERL_UNICODE_MAX)) {
            if ((flags & (UTF8_WARN_SUPER|UTF8_CHECK_ONLY)) == UTF8_WARN_SUPER) {
-               sv = sv_2mortal(newSVpvf_nocontext("Code point 0x%04"UVXf" is not Unicode, may not be portable", uv));
+               sv = sv_2mortal(Perl_newSVpvf(aTHX_ "Code point 0x%04"UVXf" is not Unicode, may not be portable", uv));
            }
            if (flags & UTF8_DISALLOW_SUPER) {
                goto disallowed;
@@ -766,13 +766,14 @@ returned and retlen is set, if possible, to -1.
 =cut
 */
 
+
 UV
 Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
 {
     PERL_ARGS_ASSERT_UTF8_TO_UVCHR;
 
     return utf8n_to_uvchr(s, UTF8_MAXBYTES, retlen,
-                         ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+                         ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
 }
 
 /*
@@ -798,7 +799,7 @@ Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
 
     /* Call the low level routine asking for checks */
     return Perl_utf8n_to_uvuni(aTHX_ s, UTF8_MAXBYTES, retlen,
-                              ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+                              ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
 }
 
 /*
@@ -1801,6 +1802,24 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp,
 
     PERL_ARGS_ASSERT_TO_UTF8_CASE;
 
+    /* Note that swash_fetch() doesn't output warnings for these because it
+     * assumes we will */
+    if (uv1 >= UNICODE_SURROGATE_FIRST && ckWARN_d(WARN_UTF8)) {
+       if (uv1 <= UNICODE_SURROGATE_LAST) {
+           const char* desc = (PL_op) ? OP_DESC(PL_op) : normal;
+           Perl_warner(aTHX_ packWARN(WARN_UTF8),
+               "Operation \"%s\" returns its argument for UTF-16 surrogate U+%04"UVXf"", desc, uv1);
+       }
+       else if (UNICODE_IS_SUPER(uv1)) {
+           const char* desc = (PL_op) ? OP_DESC(PL_op) : normal;
+           Perl_warner(aTHX_ packWARN(WARN_UTF8),
+               "Operation \"%s\" returns its argument for non-Unicode code point 0x%04"UVXf"", desc, uv1);
+       }
+
+       /* Note that non-characters are perfectly legal, so no warning should
+        * be given */
+    }
+
     uvuni_to_utf8(tmpbuf, uv1);
 
     if (!*swashp) /* load on-demand */
@@ -2121,6 +2140,18 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8)
       /* If char is encoded then swatch is for the prefix */
        needents = (1 << UTF_ACCUMULATION_SHIFT);
        off      = NATIVE_TO_UTF(ptr[klen]) & UTF_CONTINUATION_MASK;
+       if (UTF8_IS_SUPER(ptr) && ckWARN_d(WARN_UTF8)) {
+           const UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXBYTES, 0, 0);
+
+           /* This outputs warnings for binary properties only, assuming that
+            * to_utf8_case() will output any.  Also, surrogates aren't checked
+            * for, as that would warn on things like /\p{Gc=Cs}/ */
+           SV** const bitssvp = hv_fetchs(hv, "BITS", FALSE);
+           if (SvUV(*bitssvp) == 1) {
+               Perl_warner(aTHX_ packWARN(WARN_UTF8),
+                   "Code point 0x%04"UVXf" is not Unicode, no properties match it; all inverse properties do", code_point);
+           }
+       }
     }
 
     /*