This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8n_to_uvchr: U+ should be for only Unicode code points
authorKarl Williamson <khw@cpan.org>
Mon, 19 Jun 2017 18:56:38 +0000 (12:56 -0600)
committerKarl Williamson <khw@cpan.org>
Thu, 13 Jul 2017 03:14:25 +0000 (21:14 -0600)
For above-Unicode, we should use 0xDEADBEEF instead of U+DEADBEEF.
                                 ^^                    ^^
This is because U+ only applies to Unicode.  This only affects a warning
message for overlongs.

ext/XS-APItest/t/utf8_warn_base.pl
utf8.c

index 2a77db6..b6771cb 100644 (file)
@@ -953,11 +953,14 @@ foreach my $test (@tests) {
                                         = display_bytes_no_quotes($this_bytes);
                                 my $correct_bytes
                                              = display_bytes_no_quotes($bytes);
+                                my $prefix = ($allowed_uv > 0x10FFFF)
+                                             ? "0x"
+                                             : "U+";
                                 push @expected_warnings,
                                      qr/\QMalformed UTF-8 character:\E
                                         \Q $overlong_bytes (overlong;\E
                                         \Q instead use $correct_bytes to\E
-                                        \Q represent U+$uv_string)/x;
+                                        \Q represent $prefix$uv_string)/x;
                             }
                             else {
                                 push @expected_warnings, qr/overlong/;
diff --git a/utf8.c b/utf8.c
index 87cb60d..76c3487 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1587,12 +1587,16 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
                             U8 tmpbuf[UTF8_MAXBYTES+1];
                             const U8 * const e = uvoffuni_to_utf8_flags(tmpbuf,
                                                                         uv, 0);
+                            const char * preface = (uv <= PERL_UNICODE_MAX)
+                                                   ? "U+"
+                                                   : "0x";
                             message = Perl_form(aTHX_
                                 "%s: %s (overlong; instead use %s to represent"
-                                " U+%0*" UVXf ")",
+                                " %s%0*" UVXf ")",
                                 malformed_text,
                                 _byte_dump_string(s0, curlen, 0),
                                 _byte_dump_string(tmpbuf, e - tmpbuf, 0),
+                                preface,
                                 ((uv < 256) ? 2 : 4), /* Field width of 2 for
                                                          small code points */
                                 uv);