X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/7620cb1076a0ca7cf9c77b73d4e7c6ff861d3a91..d89ea36076afa0a17a3faa7cb33a1c9c215a26eb:/lib/charnames.t diff --git a/lib/charnames.t b/lib/charnames.t index 9d37daa..cd87350 100644 --- a/lib/charnames.t +++ b/lib/charnames.t @@ -41,7 +41,7 @@ use charnames ":full"; 1 EOE - like($@, "above 0xFF", "Verify get warning for \\N{above ff} under 'use bytes' with :full"); + like($@, qr/above 0xFF/, "Verify get warning for \\N{above ff} under 'use bytes' with :full"); ok(! defined $res, "... and result is undefined"); $res = eval <<'EOE'; @@ -49,7 +49,7 @@ use charnames 'cyrillic'; "Here: \N{Be}!"; 1 EOE - like($@, "CYRILLIC CAPITAL LETTER BE.*above 0xFF", "Verify get warning under 'use bytes' with explicit script"); + like($@, qr/CYRILLIC CAPITAL LETTER BE.*above 0xFF/, "Verify get warning under 'use bytes' with explicit script"); ok(! defined $res, "... and result is undefined"); $res = eval <<'EOE'; @@ -62,12 +62,13 @@ EOE { - use charnames ':full', ":alias" => { mychar1 => "0xE8000", - mychar2 => 983040, # U+F0000 - mychar3 => "U+100000", - myctrl => 0x80, - mylarge => "U+111000", - }; + use charnames ":alias" => { mychar1 => "0xE8000", + mychar2 => 983040, # U+F0000 + mychar3 => "U+100000", + myctrl => utf8::unicode_to_native(0x80), + mylarge => "U+111000", + }; + is ("\N{PILE OF POO}", chr(0x1F4A9), "Verify :alias alone implies :full"); is ("\N{mychar1}", chr(0xE8000), "Verify that can define hex alias"); is (charnames::viacode(0xE8000), "mychar1", "And that can get the alias back"); is ("\N{mychar2}", chr(0xF0000), "Verify that can define decimal alias"); @@ -76,29 +77,14 @@ EOE is (charnames::viacode(0x100000), "mychar3", "And that can get the alias back"); is ("\N{mylarge}", chr(0x111000), "Verify that can define alias beyond Unicode"); is (charnames::viacode(0x111000), "mylarge", "And that can get the alias back"); - is (charnames::viacode(0x80), "myctrl", "Verify that can name a nameless control"); + is (charnames::viacode(utf8::unicode_to_native(0x80)), "myctrl", "Verify that can name a nameless control"); } -my $encoded_be; -my $encoded_alpha; -my $encoded_bet; -my $encoded_deseng; - -# If octal representation of unicode char is \0xyzt, then the utf8 is \3xy\2zt -if (ord('A') == 65) { # as on ASCII or UTF-8 machines - $encoded_be = "\320\261"; - $encoded_alpha = "\316\261"; - $encoded_bet = "\327\221"; - $encoded_deseng = "\360\220\221\215"; -} -else { # EBCDIC where UTF-EBCDIC may be used (this may be 1047 specific since - # UTF-EBCDIC is codepage specific) - $encoded_be = "\270\102\130"; - $encoded_alpha = "\264\130"; - $encoded_bet = "\270\125\130"; - $encoded_deseng = "\336\102\103\124"; -} +my $encoded_be = byte_utf8a_to_utf8n("\320\261"); +my $encoded_alpha = byte_utf8a_to_utf8n("\316\261"); +my $encoded_bet = byte_utf8a_to_utf8n("\327\221"); +my $encoded_deseng = byte_utf8a_to_utf8n("\360\220\221\215"); sub to_bytes { unpack"U0a*", shift; @@ -208,10 +194,10 @@ sub test_vianame ($$$) { use charnames ':full'; my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}"; - is($text, latin1_to_native("\xc4"), 'Verify \N{} returns correct string under "no utf8"'); + is($text, chr utf8::unicode_to_native(0xc4), 'Verify \N{} returns correct string under "no utf8"'); # I'm not sure that this tests anything different from the above. - cmp_ok(ord($text), '==', ord(latin1_to_native("\xc4")), '... and ords are ok'); + cmp_ok(ord($text), '==', utf8::unicode_to_native(0xc4), '... and ords are ok'); } { @@ -234,7 +220,7 @@ sub test_vianame ($$$) { use bytes; is(charnames::vianame("GOTHIC LETTER AHSA"), 0x10330, "Verify vianame \\N{name} is unaffected by 'use bytes'"); - is(charnames::vianame("U+FF"), chr(0xFF), "Verify vianame \\N{U+FF} is unaffected by 'use bytes'"); + is(charnames::vianame("U+FF"), chr(utf8::unicode_to_native(0xFF)), "Verify vianame \\N{U+FF} is unaffected by 'use bytes'"); cmp_ok($warning_count, '==', scalar @WARN, "Verify vianame doesn't warn on legal inputs under 'use bytes'"); ok(! defined charnames::vianame("U+100"), "Verify vianame \\N{U+100} is undef under 'use bytes'"); ok($warning_count == scalar @WARN - 1 && $WARN[-1] =~ /above 0xFF/, "Verify vianame gives appropriate warning for previous test"); @@ -243,9 +229,9 @@ sub test_vianame ($$$) { ok(! defined charnames::string_vianame("GOTHIC LETTER AHSA"), "Verify string_vianame(\"GOTHIC LETTER AHSA\") is undefined under 'use bytes'"); ok($warning_count == scalar @WARN - 1 && $WARN[-1] =~ /above 0xFF/, "Verify string_vianame gives appropriate warning for previous test"); $warning_count = @WARN; - is(charnames::string_vianame("U+FF"), chr(0xFF), "Verify string_vianame(\"U+FF\") is chr(0xFF) under 'use bytes'"); + is(charnames::string_vianame("U+FF"), chr(utf8::unicode_to_native(0xFF)), "Verify string_vianame(\"U+FF\") is chr(0xFF) under 'use bytes'"); cmp_ok($warning_count, '==', scalar @WARN, "Verify string_vianame doesn't warn on legal inputs under 'use bytes'"); - is(charnames::string_vianame("LATIN SMALL LETTER Y WITH DIAERESIS"), chr(0xFF), "Verify string_vianame(\"LATIN SMALL LETTER Y WITH DIAERESIS\") is chr(0xFF) under 'use bytes'"); + is(charnames::string_vianame("LATIN SMALL LETTER Y WITH DIAERESIS"), chr(utf8::unicode_to_native(0xFF)), "Verify string_vianame(\"LATIN SMALL LETTER Y WITH DIAERESIS\") is chr(native 0xFF) under 'use bytes'"); cmp_ok($warning_count, '==', scalar @WARN, "Verify string_vianame doesn't warn on legal inputs under 'use bytes'"); ok(! defined charnames::string_vianame("U+100"), "Verify string_vianame \\N{U+100} is undef under 'use bytes'"); ok($warning_count == scalar @WARN - 1 && $WARN[-1] =~ /above 0xFF/, "Verify string_vianame gives appropriate warning for previous test"); @@ -278,7 +264,7 @@ is("\N{LINE FEED (LF)}", "\n", 'Verify "\N{LINE FEED (LF)}" eq "\n"'); is("\N{LINE FEED}", "\n", 'Verify "\N{LINE FEED}" eq "\n"'); is("\N{LF}", "\n", 'Verify "\N{LF}" eq "\n"'); -my $nel = latin1_to_native("\x85"); +my $nel = chr utf8::unicode_to_native(0x85); $nel = qr/^$nel$/; like("\N{NEXT LINE (NEL)}", $nel, 'Verify "\N{NEXT LINE (NEL)}" is correct'); @@ -295,11 +281,11 @@ is("\N{BOM}", chr(0xFEFF), 'Verify "\N{BOM}" is correct'); my $ok = ! grep { /"HORIZONTAL TABULATION" is deprecated.*"CHARACTER TABULATION"/ } @WARN; ok($ok, '... and doesnt give deprecated warning'); - # XXX These tests should be changed for 5.16, when we convert BELL to the - # Unicode version. - is("\N{BELL}", "\a", 'Verify "\N{BELL}" eq "\a"'); - my $ok = grep { /"BELL" is deprecated.*"ALERT"/ } @WARN; - ok($ok, '... and that gives correct deprecated warning'); + if ($^V lt v5.17.0) { + is("\N{BELL}", "\a", 'Verify "\N{BELL}" eq "\a"'); + my $ok = grep { /"BELL" is deprecated.*"ALERT"/ } @WARN; + ok($ok, '... and that gives correct deprecated warning'); + } no warnings 'deprecated'; @@ -313,6 +299,19 @@ is("\N{BOM}", chr(0xFEFF), 'Verify "\N{BOM}" is correct'); is(charnames::viacode(0xFEFF), "ZERO WIDTH NO-BREAK SPACE", 'Verify viacode(0xFEFF) is correct'); +# These test that the changes to these in 6.1 are recognized. (The double +# test of using viacode and vianame is less than optimal as two errors could +# cancel each other out, but later each is tested individually, and this +# sidesteps and EBCDIC issues. +is(charnames::viacode(charnames::vianame("CR")), "CARRIAGE RETURN", + 'Verify viacode(vianame("CR")) is "CARRIAGE RETURN"'); +is(charnames::viacode(charnames::vianame("LF")), "LINE FEED", + 'Verify viacode(vianame("LF")) is "LINE FEED"'); +is(charnames::viacode(charnames::vianame("FF")), "FORM FEED", + 'Verify viacode(vianame("FF")) is "FORM FEED"'); +is(charnames::viacode(charnames::vianame("NEL")), "NEXT LINE", + 'Verify viacode(vianame("NEL")) is "NEXT LINE"'); + { use warnings; cmp_ok(ord("\N{BOM}"), '==', 0xFEFF, 'Verify \N{BOM} is correct'); @@ -336,7 +335,8 @@ ok(! defined charnames::viacode(0x110000), ok((grep { /\Qyou asked for U+110000/ } @WARN), '... and gives warning'); is(charnames::viacode(0), "NULL", 'Verify charnames::viacode(0) eq "NULL"'); -is(charnames::viacode("BE"), "VULGAR FRACTION THREE QUARTERS", 'Verify charnames::viacode("BE") eq "VULGAR FRACTION THREE QUARTERS"'); +my $three_quarters = sprintf("%2X", utf8::unicode_to_native(0xBE)); +is(charnames::viacode("$three_quarters"), "VULGAR FRACTION THREE QUARTERS", 'Verify charnames::viacode(native "BE") eq "VULGAR FRACTION THREE QUARTERS"'); is(charnames::viacode("U+00000000000FEED"), "ARABIC LETTER WAW ISOLATED FORM", 'Verify charnames::viacode("U+00000000000FEED") eq "ARABIC LETTER WAW ISOLATED FORM"'); { @@ -352,7 +352,7 @@ is(charnames::viacode("U+00000000000FEED"), "ARABIC LETTER WAW ISOLATED FORM", ' is("\N{EOT}", "\N{END OF TRANSMISSION}", 'Verify "\N{EOT}" eq "\N{END OF TRANSMISSION}"'); is("\N{ENQ}", "\N{ENQUIRY}", 'Verify "\N{ENQ}" eq "\N{ENQUIRY}"'); is("\N{ACK}", "\N{ACKNOWLEDGE}", 'Verify "\N{ACK}" eq "\N{ACKNOWLEDGE}"'); - is("\N{BEL}", "\N{BELL}", 'Verify "\N{BEL}" eq "\N{BELL}"'); + is("\N{BEL}", "\N{BELL}", 'Verify "\N{BEL}" eq "\N{BELL}"') if $^V lt v5.17.0; is("\N{BS}", "\N{BACKSPACE}", 'Verify "\N{BS}" eq "\N{BACKSPACE}"'); is("\N{HT}", "\N{HORIZONTAL TABULATION}", 'Verify "\N{HT}" eq "\N{HORIZONTAL TABULATION}"'); is("\N{LF}", "\N{LINE FEED (LF)}", 'Verify "\N{LF}" eq "\N{LINE FEED (LF)}"'); @@ -733,7 +733,7 @@ is($_, 'foobar', 'Verify charnames.pm doesnt clobbers $_'); my $names = do "unicore/Name.pl"; ok(defined $names, "Verify can read 'unicore/Name.pl'"); -my $non_ascii = native_to_latin1($names) =~ tr/\0-\177//c; +my $non_ascii = native_to_uni($names) =~ tr/\0-\177//c; ok(! $non_ascii, "Verify all official names are ASCII-only"); # Verify that charnames propagate to eval("") @@ -763,7 +763,7 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V $res .= '-' . ($^H{73174} // ""); $res .= '-2' if ":" =~ /\N{COLON}/; $res .= '-3' if ":" =~ /\N{COLON}/i; - is($res, "foo-foo-1--2-3", "Verify %^H doesn't get reset by \N{...}"); + is($res, "foo-foo-1--2-3", "Verify %^H doesn't get reset by \\N{...}"); } { use charnames qw(.*); @@ -823,7 +823,8 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V is("\N{mychar1}", "f", "Inner block: verify that \\N{mychar1} is redefined"); is(charnames::vianame("mychar1"), ord("f"), "Inner block: verify that vianame(mychar1) is redefined"); is(charnames::string_vianame("mychar1"), "f", "Inner block: verify that string_vianame(mychar1) is redefined"); - is("\N{mychar2}", "\x{FFFD}", "Inner block: verify that \\N{mychar2} outer definition didn't leak"); + eval '"\N{mychar2}"'; + like($@, qr/Unknown charname 'mychar2'/, "Inner block: verify that \\N{mychar2} outer definition didn't leak"); ok( ! defined charnames::vianame("mychar2"), "Inner block: verify that vianame(mychar2) outer definition didn't leak"); ok( ! defined charnames::string_vianame("mychar2"), "Inner block: verify that string_vianame(mychar2) outer definition didn't leak"); is("\N{myprivate1}", "\x{E8001}", "Inner block: verify that \\N{myprivate1} is redefined "); @@ -831,38 +832,46 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V is(charnames::string_vianame("myprivate1"), chr(0xE8001), "Inner block: verify that string_vianame(myprivate1) is redefined"); is(charnames::viacode(0xE8001), "myprivate1", "Inner block: verify that myprivate1 viacode is redefined"); ok(! defined charnames::viacode(0xE8000), "Inner block: verify that outer myprivate1 viacode didn't leak"); - is("\N{myprivate2}", "\x{FFFD}", "Inner block: verify that \\N{myprivate2} outer definition didn't leak"); + eval '"\N{myprivate2}"'; + like($@, qr/Unknown charname 'myprivate2'/, "Inner block: verify that \\N{myprivate2} outer definition didn't leak"); ok(! defined charnames::vianame("myprivate2"), "Inner block: verify that vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::string_vianame("myprivate2"), "Inner block: verify that string_vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::viacode(0x100000), "Inner block: verify that myprivate2 viacode outer definition didn't leak"); is("\N{BE}", $hiragana_be, "Inner block: verify that \\N uses the correct script"); cmp_ok(charnames::vianame("BE"), "==", ord($hiragana_be), "Inner block: verify that vianame uses the correct script"); cmp_ok(charnames::string_vianame("BE"), "==", $hiragana_be, "Inner block: verify that string_vianame uses the correct script"); - is("\N{Hiragana: BE}", "\x{FFFD}", "Inner block without :short: \\N with short doesn't work"); + eval '"\N{Hiragana: BE}"'; + like($@, qr/Unknown charname 'Hiragana: BE'/, "Inner block without :short: \\N with short doesn't work"); ok(! defined charnames::vianame("Hiragana: BE"), "Inner block without :short: verify that vianame with short doesn't work"); ok(! defined charnames::string_vianame("Hiragana: BE"), "Inner block without :short: verify that string_vianame with short doesn't work"); { # An inner block where only :short definitions are valid. use charnames ":short"; - is("\N{mychar1}", "\x{FFFD}", "Inner inner block: verify that mychar1 outer definition didn't leak with \\N"); + eval '"\N{mychar1}"'; + like($@, qr/Unknown charname 'mychar1'/, "Inner inner block: verify that mychar1 outer definition didn't leak with \\N"); ok( ! defined charnames::vianame("mychar1"), "Inner inner block: verify that mychar1 outer definition didn't leak with vianame"); ok( ! defined charnames::string_vianame("mychar1"), "Inner inner block: verify that mychar1 outer definition didn't leak with string_vianame"); - is("\N{mychar2}", "\x{FFFD}", "Inner inner block: verify that mychar2 outer definition didn't leak with \\N"); + eval '"\N{mychar2}"'; + like($@, qr/Unknown charname 'mychar2'/, "Inner inner block: verify that mychar2 outer definition didn't leak with \\N"); ok( ! defined charnames::vianame("mychar2"), "Inner inner block: verify that mychar2 outer definition didn't leak with vianame"); ok( ! defined charnames::string_vianame("mychar2"), "Inner inner block: verify that mychar2 outer definition didn't leak with string_vianame"); - is("\N{myprivate1}", "\x{FFFD}", "Inner inner block: verify that myprivate1 outer definition didn't leak with \\N"); + eval '"\N{myprivate1}"'; + like($@, qr/Unknown charname 'myprivate1'/, "Inner inner block: verify that myprivate1 outer definition didn't leak with \\N"); ok(! defined charnames::vianame("myprivate1"), "Inner inner block: verify that myprivate1 outer definition didn't leak with vianame"); ok(! defined charnames::string_vianame("myprivate1"), "Inner inner block: verify that myprivate1 outer definition didn't leak with string_vianame"); - is("\N{myprivate2}", "\x{FFFD}", "Inner inner block: verify that myprivate2 outer definition didn't leak with \\N"); + eval '"\N{myprivate2}"'; + like($@, qr/Unknown charname 'myprivate2'/, "Inner inner block: verify that myprivate2 outer definition didn't leak with \\N"); ok(! defined charnames::vianame("myprivate2"), "Inner inner block: verify that myprivate2 outer definition didn't leak with vianame"); ok(! defined charnames::string_vianame("myprivate2"), "Inner inner block: verify that myprivate2 outer definition didn't leak with string_vianame"); ok(! defined charnames::viacode(0xE8000), "Inner inner block: verify that mychar1 outer outer definition didn't leak with viacode"); ok(! defined charnames::viacode(0xE8001), "Inner inner block: verify that mychar1 outer definition didn't leak with viacode"); ok(! defined charnames::viacode(0x100000), "Inner inner block: verify that mychar2 outer definition didn't leak with viacode"); - is("\N{BE}", "\x{FFFD}", "Inner inner block without script: verify that outer :script didn't leak with \\N"); + eval '"\N{BE}"'; + like($@, qr/Unknown charname 'BE'/, "Inner inner block without script: verify that outer :script didn't leak with \\N"); ok(! defined charnames::vianame("BE"), "Inner inner block without script: verify that outer :script didn't leak with vianames"); ok(! defined charnames::string_vianame("BE"), "Inner inner block without script: verify that outer :script didn't leak with string_vianames"); - is("\N{HIRAGANA LETTER BE}", "\x{FFFD}", "Inner inner block without :full: verify that outer :full didn't leak with \\N"); + eval '"\N{HIRAGANA LETTER BE}"'; + like($@, qr/Unknown charname 'HIRAGANA LETTER BE'/, "Inner inner block without :full: verify that outer :full didn't leak with \\N"); is("\N{Hiragana: BE}", $hiragana_be, "Inner inner block with :short: verify that \\N works with :short"); cmp_ok(charnames::vianame("Hiragana: BE"), "==", ord($hiragana_be), "Inner inner block with :short: verify that vianame works with :short"); cmp_ok(charnames::string_vianame("Hiragana: BE"), "==", $hiragana_be, "Inner inner block with :short: verify that string_vianame works with :short"); @@ -872,7 +881,8 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V is("\N{mychar1}", "f", "Inner block: verify that \\N{mychar1} is redefined"); is(charnames::vianame("mychar1"), ord("f"), "Inner block: verify that vianame(mychar1) is redefined"); is(charnames::string_vianame("mychar1"), "f", "Inner block: verify that string_vianame(mychar1) is redefined"); - is("\N{mychar2}", "\x{FFFD}", "Inner block: verify that \\N{mychar2} outer definition didn't leak"); + eval '"\N{mychar2}"'; + like($@, qr/Unknown charname 'mychar2'/, "Inner block: verify that \\N{mychar2} outer definition didn't leak"); ok( ! defined charnames::vianame("mychar2"), "Inner block: verify that vianame(mychar2) outer definition didn't leak"); ok( ! defined charnames::string_vianame("mychar2"), "Inner block: verify that string_vianame(mychar2) outer definition didn't leak"); is("\N{myprivate1}", "\x{E8001}", "Inner block: verify that \\N{myprivate1} is redefined "); @@ -880,14 +890,16 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V is(charnames::string_vianame("myprivate1"), chr(0xE8001), "Inner block: verify that string_vianame(myprivate1) is redefined"); is(charnames::viacode(0xE8001), "myprivate1", "Inner block: verify that myprivate1 viacode is redefined"); ok(! defined charnames::viacode(0xE8000), "Inner block: verify that outer myprivate1 viacode didn't leak"); - is("\N{myprivate2}", "\x{FFFD}", "Inner block: verify that \\N{myprivate2} outer definition didn't leak"); + eval '"\N{myprivate2}"'; + like($@, qr/Unknown charname 'myprivate2'/, "Inner block: verify that \\N{myprivate2} outer definition didn't leak"); ok(! defined charnames::vianame("myprivate2"), "Inner block: verify that vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::string_vianame("myprivate2"), "Inner block: verify that string_vianame(myprivate2) outer definition didn't leak"); ok(! defined charnames::viacode(0x100000), "Inner block: verify that myprivate2 viacode outer definition didn't leak"); is("\N{BE}", $hiragana_be, "Inner block: verify that \\N uses the correct script"); cmp_ok(charnames::vianame("BE"), "==", ord($hiragana_be), "Inner block: verify that vianame uses the correct script"); cmp_ok(charnames::string_vianame("BE"), "==", $hiragana_be, "Inner block: verify that string_vianame uses the correct script"); - is("\N{Hiragana: BE}", "\x{FFFD}", "Inner block without :short: \\N with short doesn't work"); + eval '"\N{Hiragana: BE}"'; + like($@, qr/Unknown charname 'Hiragana: BE'/, "Inner block without :short: \\N with short doesn't work"); ok(! defined charnames::vianame("Hiragana: BE"), "Inner block without :short: verify that vianame with short doesn't work"); ok(! defined charnames::string_vianame("Hiragana: BE"), "Inner block without :short: verify that string_vianame with short doesn't work"); } @@ -920,7 +932,8 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V is(charnames::string_vianame("O-i"), chr(0x10426), "Verify that loose script list matching works with string_vianame"); is(charnames::vianame("o i"), 0x1044E, "Verify that loose script list matching works with vianame"); } - is ("\N{latincapitallettera}", "\x{FFFD}", "Verify that loose matching caching doesn't leak outside of scope"); + eval '"\N{latincapitallettera}"'; + like($@, qr/Unknown charname 'latincapitallettera'/, "Verify that loose matching caching doesn't leak outside of scope"); { use charnames qw(:loose :short); cmp_ok("\N{co pt-ic:she-i}", "==", chr(0x3E3), "Verify that loose :short matching works"); @@ -997,17 +1010,20 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V while (<$fh>) { chomp; my ($code, $name, undef, undef, undef, undef, undef, undef, undef, undef, $u1name) = split ";"; - my $decimal = hex $code; + my $decimal = utf8::unicode_to_native(hex $code); + $code = sprintf("%04X", $decimal) unless $::IS_ASCII; + + $decimal = hex $code; # The Unicode version 1 name is used instead of any that are - # marked + # marked . $name = $u1name if $name eq ""; - $name = 'ALERT' if $decimal == 7; + # In earlier Perls, we reject this code point's name (BELL) + $name = "" if $^V lt v5.17.0 && $decimal == 0x1F514; - # XXX This test should be changed for 5.16 when we convert to use - # Unicode's BELL - $name = "" if $decimal == 0x1F514; + # ALERT overrides BELL + $name = 'ALERT' if $decimal == utf8::unicode_to_native(7); # Some don't have names, leave those array elements undefined next unless $name; @@ -1040,23 +1056,67 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V } close $fh; - # The Hangul syllable names aren't in the file above; their names - # are algorithmically determinable, but to avoid perpetuating any - # programming errors, this file contains the complete list, gathered - # from the web. - while () { - chomp; - next unless $_; # Guard against empty lines getting inserted. - my ($code, $name) = split ";"; - my $decimal = hex $code; - $names[$decimal] = $name; - my $block = $decimal >> $block_size_bits; - $algorithmic_names_count[$block] = 1; + use Unicode::UCD; + if (pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) gt v1.1.5) { + # The Hangul syllable names aren't in the file above; their names + # are algorithmically determinable, but to avoid perpetuating any + # programming errors, this file contains the complete list, gathered + # from the web. + while () { + chomp; + next unless $_; # Guard against empty lines getting inserted. + my ($code, $name) = split ";"; + my $decimal = hex $code; + $names[$decimal] = $name; + my $block = $decimal >> $block_size_bits; + $algorithmic_names_count[$block] = 1; + } } - open $fh, "<", "../../lib/unicore/NameAliases.txt" or - die "Can't open ../../lib/unicore/NameAliases.txt: $!"; - while (<$fh>) { + my @name_aliases; + use Unicode::UCD; + if (ord('A') == 65 + && pack( "C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v6.1.0) + { + open my $fh, "<", "../../lib/unicore/NameAliases.txt" + or die "Can't open ../../lib/unicore/NameAliases.txt: $!"; + @name_aliases = <$fh> + } + else { + + # If this Unicode version doesn't have the full .txt file, or are on + # an EBCDIC platform where they need to be translated, get the data + # from prop_invmap() (which should do the translation) and convert it + # to the file's format + use Unicode::UCD 'prop_invmap'; + my ($invlist_ref, $invmap_ref, undef, $default) + = prop_invmap('Name_Alias'); + for my $i (0 .. @$invlist_ref - 1) { + + # Convert the aliases for code points that have just one alias to + # single element arrays for uniform handling below. + if (! ref $invmap_ref->[$i]) { + + # But we test only the real aliases, not the ones which are + # just really placeholders. + next if $invmap_ref->[$i] eq $default; + + $invmap_ref->[$i] = [ $invmap_ref->[$i] ]; + } + + + # Change each alias for the code point to the form that the file + # has + foreach my $j ($invlist_ref->[$i] .. $invlist_ref->[$i+1] - 1) { + foreach my $value (@{$invmap_ref->[$i]}) { + $value =~ s/: /;/; + push @name_aliases, sprintf("%04X;%s\n", $j, $value); + } + } + } + } + + for (@name_aliases) { chomp; s/^\s*#.*//; next unless $_; @@ -1138,7 +1198,10 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V # These four code points now have names, from NameAlias, but # aren't listed as having names in UnicodeData.txt, so viacode # returns their alias names, not undef - next if $i == 0x80 || $i == 0x81 || $i == 0x84 || $i == 0x99; + next if $i == utf8::unicode_to_native(0x80) + || $i == utf8::unicode_to_native(0x81) + || $i == utf8::unicode_to_native(0x84) + || $i == utf8::unicode_to_native(0x99); # If there is no name for this code point, all we can # test is that. @@ -1148,7 +1211,17 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V # Otherwise, test that the name and code point map # correctly. $all_pass &= test_vianame($i, $hex, $names[$i]); - $all_pass &= is(charnames::viacode($i), $names[$i], "Verify viacode(0x$hex) is \"$names[$i]\""); + + # These four code points have a different Unicode1 name than + # regular name, and viacode has already specifically tested + # for the regular name + if ($i != utf8::unicode_to_native(0x0a) + && $i != utf8::unicode_to_native(0x0c) + && $i != utf8::unicode_to_native(0x0d) + && $i != utf8::unicode_to_native(0x85)) + { + $all_pass &= is(charnames::viacode($i), $names[$i], "Verify viacode(0x$hex) is \"$names[$i]\""); + } # And make sure that a non-algorithmically named code # point doesn't also map to one that is. @@ -1166,21 +1239,26 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V $block = $end_block + 1; } - open $fh, "<", "../../lib/unicore/NamedSequences.txt" or - die "Can't open ../../lib/unicore/NamedSequences.txt: $!"; - while (<$fh>) { - chomp; - s/^\s*#.*//; - next unless $_; - my ($name, $codes) = split ";"; - my $utf8 = pack("U*", map { hex } split " ", $codes); - is(charnames::string_vianame($name), $utf8, "Verify string_vianame(\"$name\") is the proper utf8"); - my $loose_name = get_loose_name($name); - use charnames ":loose"; - is(charnames::string_vianame($loose_name), $utf8, "Verify string_vianame(\"$loose_name\") is the proper utf8"); - #diag("$name, $utf8"); + if (open my $fh, "<", "../../lib/unicore/NamedSequences.txt") { + while (<$fh>) { + chomp; + s/^\s*#.*//; + next unless $_; + my ($name, $codes) = split ";"; + my $utf8 = pack("W*", map { hex } split " ", $codes); + is(charnames::string_vianame($name), $utf8, "Verify string_vianame(\"$name\") is the proper utf8"); + my $loose_name = get_loose_name($name); + use charnames ":loose"; + is(charnames::string_vianame($loose_name), $utf8, "Verify string_vianame(\"$loose_name\") is the proper utf8"); + #diag("$name, $utf8"); + } + close $fh; + } + else { + use Unicode::UCD; + die "Can't open ../../lib/unicore/NamedSequences.txt: $!" + if pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v4.1.0; } - close $fh; unless ($all_pass) {