utf8_to_uvchr() EBCDIC fix
authorKarl Williamson <khw@cpan.org>
Sun, 6 Aug 2017 03:47:08 +0000 (21:47 -0600)
committerKarl Williamson <khw@cpan.org>
Sun, 6 Aug 2017 04:10:05 +0000 (22:10 -0600)
This fixes a warning message for EBCDIC.  The native character set is
different than Unicode, and needs special handling.  I earlier tried to
save an #ifdef, but the resulting warning was hard to test right, and
that helped convince me that it would be confusing to anyone trying to
make sense of the message.  So, in goes the #ifdef.

ext/XS-APItest/t/utf8_warn_base.pl
utf8.c

index 07652e8..0f781c4 100644 (file)
@@ -121,7 +121,7 @@ my @tests;
         ],
         [ "overlong malformation, highest 2-byte",
             (isASCII) ? "\xc1\xbf" : I8_to_native("\xc4\xbf"),
-            (isASCII) ? 0x7F : 0x9F,    # Output as U+, so no need to xlate
+            (isASCII) ? 0x7F : 0xFF,
         ],
         [ "overlong malformation, lowest 3-byte",
             (isASCII) ? "\xe0\x80\x80" : I8_to_native("\xe0\xa0\xa0"),
diff --git a/utf8.c b/utf8.c
index 93cdd66..c24baeb 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1875,11 +1875,17 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                         }
                         else {
                             U8 tmpbuf[UTF8_MAXBYTES+1];
-                            const U8 * const e = uvoffuni_to_utf8_flags(tmpbuf,
-                                                                        uv, 0);
-                            const char * preface = (uv <= PERL_UNICODE_MAX)
-                                                   ? "U+"
-                                                   : "0x";
+                            const U8 * const e = uvchr_to_utf8_flags(tmpbuf,
+                                                                     uv, 0);
+                            /* Don't use U+ for non-Unicode code points, which
+                             * includes those in the Latin1 range */
+                            const char * preface = (    uv > PERL_UNICODE_MAX
+#ifdef EBCDIC
+                                                     || uv <= 0xFF
+#endif
+                                                    )
+                                                   ? "0x"
+                                                   : "U+";
                             message = Perl_form(aTHX_
                                 "%s: %s (overlong; instead use %s to represent"
                                 " %s%0*" UVXf ")",