#!./perl
use strict;
+# Test charnames.pm. If $ENV{PERL_RUN_SLOW_TESTS} is unset or 0, a random
+# selection of names is tested, a higher percentage of regular names is tested
+# than algorithmically-determined names.
+
+my $RUN_SLOW_TESTS_EVERY_CODE_POINT = 100;
+
+# If $ENV{PERL_RUN_SLOW_TESTS} is at least 1 and less than the number above,
+# all code points with names are tested. If it is at least that number, all
+# 1,114,112 Unicode code points are tested.
+
# Because \N{} is compile time, any warnings will get generated before
# execution, so have to have an array, and arrange things so no warning
# is generated twice to verify that in fact a warning did happen
# ---- For the alias extensions
require "../t/lib/common.pl";
-use charnames ':full';
-
-is("Here\N{EXCLAMATION MARK}?", "Here!?");
+is("Here\N{EXCLAMATION MARK}?", "Here!?", "Basic sanity, autoload of :full upon \\N");
+is("\N{latin: Q}", "Q", "autoload of :short upon \\N");
{
use bytes; # TEST -utf8 can switch utf8 on
1
EOE
- like($@, "above 0xFF");
- ok(! defined $res);
+ like($@, "above 0xFF", "Verify get warning for \\N{above ff} under 'use bytes' with :full");
+ ok(! defined $res, "... and result is undefined");
$res = eval <<'EOE';
use charnames 'cyrillic';
"Here: \N{Be}!";
1
EOE
- like($@, "CYRILLIC CAPITAL LETTER BE.*above 0xFF");
+ like($@, "CYRILLIC CAPITAL LETTER BE.*above 0xFF", "Verify get warning under 'use bytes' with explicit script");
+ ok(! defined $res, "... and result is undefined");
$res = eval <<'EOE';
use charnames ':full', ":alias" => { BOM => "LATIN SMALL LETTER B" };
"\N{BOM}";
EOE
- is ($@, "");
+ is ($@, "", "Verify that there is no warning for \\N{below 256} under 'use bytes'");
is ($res, 'b', "Verify that can redefine a standard alias");
}
unpack"U0a*", shift;
}
+sub get_loose_name ($) { # Modify name to stress the loose tests.
+
+ # First, all lower case,
+ my $loose_name = lc shift;
+
+ # Then squeeze out all the blanks not adjacent to hyphens, but make the
+ # spaces that are adjacent to hypens into two, to make sure the code isn't
+ # looking for just one when looking for non-medial hyphens.
+ $loose_name =~ s/ (?<! - ) \ + (?! - )//gx;
+ $loose_name =~ s/ / /g;
+
+ # Similarly, double the hyphens
+ $loose_name =~ s/-/--/g;
+
+ # And convert ABC into "A B-C" to add medial hyphens and spaces. Probably
+ # better to do this randomly, but think this is sufficient.
+ $loose_name =~ s/ ([^-\s]) ([^-\s]) ([^-\s]) /$1 $2-$3/gx;
+
+ return $loose_name
+}
+
+sub test_vianame ($$$) {
+
+ # Run the vianame tests on a code point, both loose and full
+
+ my $all_pass = 1;
+
+ # $i is the code point in decimal; $hex in hexadecimal; $name is
+ # character name to test
+ my ($i, $hex, $name) = @_;
+
+ # Get a copy of the name modified to stress the loose tests.
+ my $loose_name = get_loose_name($name);
+
+ # Switch loose and full in vianame vs string_vianame half the time
+ if (rand() < .5) {
+ use charnames ":full";
+ $all_pass &= is(charnames::vianame($name), $i, "Verify vianame(\"$name\") is 0x$hex");
+ use charnames ":loose";
+ $all_pass &= is(charnames::string_vianame($loose_name), chr($i), "Verify string_vianame(\"$loose_name\") is chr(0x$hex)");
+ }
+ else {
+ use charnames ":loose";
+ $all_pass &= is(charnames::vianame($loose_name), $i, "Verify vianame(\"$loose_name\") is 0x$hex");
+ use charnames ":full";
+ $all_pass &= is(charnames::string_vianame($name), chr($i), "Verify string_vianame(\"$name\") is chr(0x$hex)");
+ }
+ return $all_pass;
+}
+
{
use charnames ':full';
ok(grep { /"HORIZONTAL TABULATION" is deprecated.*CHARACTER TABULATION/ } @WARN);
+ # XXX These tests should be changed for 5.16, when we convert BELL to the
+ # Unicode version.
+ is("\N{BELL}", "\a");
+ ok((grep{ /"BELL" is deprecated.*ALERT/ } @WARN), 'BELL is deprecated');
+
no warnings 'deprecated';
is("\N{VERTICAL TABULATION}", "\013");
is("\N{VTS}", "\N{LINE TABULATION SET}");
is("\N{PLD}", "\N{PARTIAL LINE FORWARD}");
is("\N{PLU}", "\N{PARTIAL LINE BACKWARD}");
- is("\N{RI }", "\N{REVERSE LINE FEED}");
+ is("\N{RI}", "\N{REVERSE LINE FEED}");
is("\N{SS2}", "\N{SINGLE SHIFT TWO}");
is("\N{SS3}", "\N{SINGLE SHIFT THREE}");
is("\N{DCS}", "\N{DEVICE CONTROL STRING}");
is("\N{PU2}", "\N{PRIVATE USE TWO}");
is("\N{STS}", "\N{SET TRANSMIT STATE}");
is("\N{CCH}", "\N{CANCEL CHARACTER}");
- is("\N{MW }", "\N{MESSAGE WAITING}");
+ is("\N{MW}", "\N{MESSAGE WAITING}");
is("\N{SPA}", "\N{START OF GUARDED AREA}");
is("\N{EPA}", "\N{END OF GUARDED AREA}");
is("\N{SOS}", "\N{START OF STRING}");
is("\N{SCI}", "\N{SINGLE CHARACTER INTRODUCER}");
is("\N{CSI}", "\N{CONTROL SEQUENCE INTRODUCER}");
- is("\N{ST }", "\N{STRING TERMINATOR}");
+ is("\N{ST}", "\N{STRING TERMINATOR}");
is("\N{OSC}", "\N{OPERATING SYSTEM COMMAND}");
- is("\N{PM }", "\N{PRIVACY MESSAGE}");
+ is("\N{PM}", "\N{PRIVACY MESSAGE}");
is("\N{APC}", "\N{APPLICATION PROGRAM COMMAND}");
is("\N{PADDING CHARACTER}", "\N{PAD}");
is("\N{HIGH OCTET PRESET}","\N{HOP}");
is("\N{VS254}", "\N{VARIATION SELECTOR-254}");
is("\N{VS255}", "\N{VARIATION SELECTOR-255}");
is("\N{VS256}", "\N{VARIATION SELECTOR-256}");
+
+ # Test a few of the above with :loose
+ use charnames ":loose";
+ is("\N{n-e xt l-i ne}", "\N{n-e xt l-i ne (-n-e l-)}");
+ is("\N{n e-l}", "\N{n e-xt l i-ne ( n e-l )}");
+ is("\N{p-a dd-i ng c-h ar-a ct-e r}", "\N{p-a d}");
+ is("\N{s i-ng l-e-s h-i f-t 3}", "\N{s i-ng l-e s h-i f-t t h-r e-e}");
+ is("\N{vs256}", "\N{v-a ri-a ti-o n s-e le-c t o-r-256}");
}
# [perl #30409] charnames.pm clobbers default variable
# Unicode slowdown noted by Phil Pennock, traced to a bug fix in index
# SADAHIRO Tomoyuki's suggestion is to ensure that the UTF-8ness of both
-# arguments are indentical before calling index.
+# arguments are identical before calling index.
# To do this can take advantage of the fact that unicore/Name.pl is 7 bit
# (or at least should be). So assert that that it's true here. EBCDIC
# may be a problem (khw).
is($res, "foo-foo-1--2-3");
}
+{ use charnames qw(.*);
+ ok (! defined charnames::vianame("a"), "Verify that metachars in script names get quoted");
+}
+
{
# Test scoping. Outer block sets up some things; inner blocks
# override them, and then see if get restored.
is("\N{Hiragana: BE}", $hiragana_be, "Outer block: verify that :short works with \\N");
cmp_ok(charnames::vianame("Hiragana: BE"), "==", ord($hiragana_be), "Outer block: verify that :short works with vianame");
cmp_ok(charnames::string_vianame("Hiragana: BE"), "==", $hiragana_be, "Outer block: verify that :short works with string_vianame");
+ {
+ use charnames qw(:loose new_tai_lue des_eret);
+ is("\N{latincapitallettera}", "A", "Verify that loose matching works");
+ cmp_ok("\N{high-qa}", "==", chr(0x1980), "Verify that loose script list matching works");
+ is(charnames::string_vianame("O-i"), chr(0x10426), "Verify that loose script list matching works with string_vianame");
+ is(charnames::vianame("o i"), 0x1044E, "Verify that loose script list matching works with vianame");
+ }
+ is ("\N{latincapitallettera}", "\x{FFFD}", "Verify that loose matching caching doesn't leak outside of scope");
+ {
+ use charnames qw(:loose :short);
+ cmp_ok("\N{co pt-ic:she-i}", "==", chr(0x3E3), "Verify that loose :short matching works");
+ is(charnames::string_vianame("co pt_ic: She i"), chr(0x3E2), "Verify that loose :short matching works with string_vianame");
+ is(charnames::vianame(" Arm-en-ian: x e h_"), 0x56D, "Verify that loose :short matching works with vianame");
+ }
}
{
$seed = srand;
}
+ my $run_slow_tests = $ENV{PERL_RUN_SLOW_TESTS} || 0;
+
# We will look at the data grouped in "blocks" of the following
# size.
my $block_size_bits = 7; # above 16 is not sensible
# of the character. The percentage of each type to test is
# fuzzily independently settable. This breaks down when the block size is
# 1 or is large enough that both types of names occur in the same block
- my $percentage_of_regular_names = 25;
+ my $percentage_of_regular_names = ($run_slow_tests) ? 100 : 13;
my $percentage_of_algorithmic_names = (100 / $block_size); # 1 test/block
# If wants everything tested, do so by changing the block size to 1 so
# marked <control>
$name = $u1name if $name eq "<control>";
+ $name = 'ALERT' if $decimal == 7;
+
+ # XXX This test should be changed for 5.16 when we convert to use
+ # Unicode's BELL
+ $name = "" if $decimal == 0x1F514;
+
# Some don't have names, leave those array elements undefined
next unless $name;
s/^\s*#.*//;
next unless $_;
my ($hex, $name) = split ";";
- if (rand() < .5) {
- is(charnames::vianame($name), hex $hex, "Verify vianame(\"$name\") is 0x$hex");
- }
- else {
- is(charnames::string_vianame($name), chr(hex $hex), "Verify string_vianame(\"$name\") is chr(0x$hex)");
- }
+ my $i = CORE::hex $hex;
+
+ # Make sure that both aliases (the one in UnicodeData, and the one we
+ # just read) return the same code point.
+ test_vianame($i, $hex, $name);
+ test_vianame($i, $hex, $names[$i]);
+
+ # Set up so that a test below of this code point will use the alias
+ # instead of the less-correct original. We can't test here that
+ # viacode is correct, because the alias file may contain multiple
+ # aliases for the same code point, and viacode should return only the
+ # final one. So don't do it here; instead rely on the loop below to
+ # pick up the test.
+ $names[$i] = $name;
}
close $fh;
my $end_block = $block;
if ($test_count == 0) {
$test_count = 1;
- $end_block++;
-
- # Keep coalescing until find a block that has something in
- # it. But don't cross plane boundaries (the 16 bits below),
- # so there is at least one test for every plane.
- while ($end_block < $block_count
- && $end_block >> (16 - $block_size_bits) == $block >> (16 - $block_size_bits)
- && ! $algorithmic_names_count[$end_block]
- && ! $regular_names_count[$end_block])
- {
+ if ($run_slow_tests < $RUN_SLOW_TESTS_EVERY_CODE_POINT) {
$end_block++;
+
+ # Keep coalescing until find a block that has something in
+ # it. But don't cross plane boundaries (the 16 bits below),
+ # so there is at least one test for every plane.
+ while ($end_block < $block_count
+ && $end_block >> (16 - $block_size_bits)
+ == $block >> (16 - $block_size_bits)
+ && ! $algorithmic_names_count[$end_block]
+ && ! $regular_names_count[$end_block])
+ {
+ $end_block++;
+ }
+ $end_block--; # Back-off to a block that has no defined names
}
- $end_block--; # Back-off to a block that has no defined names
}
# Calculated how many tests. Do them
} else {
# Otherwise, test that the name and code point map
- # correctly. Half the time use vianame, and half
- # string_vianame
- if (rand() < .5) {
- $all_pass &= is(charnames::vianame($names[$i]), $i, "Verify vianame(\"$names[$i]\") is 0x$hex");
- } else {
- $all_pass &= is(charnames::string_vianame($names[$i]), chr($i), "Verify string_vianame(\"$names[$i]\") is chr(0x$hex)");
- }
+ # correctly.
+ $all_pass &= test_vianame($i, $hex, $names[$i]);
$all_pass &= is(charnames::viacode($i), $names[$i], "Verify viacode(0x$hex) is \"$names[$i]\"");
# And make sure that a non-algorithmically named code
my ($name, $codes) = split ";";
my $utf8 = pack("U*", map { hex } split " ", $codes);
is(charnames::string_vianame($name), $utf8, "Verify string_vianame(\"$name\") is the proper utf8");
- is(charnames::string_vianame($name), $utf8, "Verify string_vianame(\"$name\") is the proper utf8");
+ my $loose_name = get_loose_name($name);
+ use charnames ":loose";
+ is(charnames::string_vianame($loose_name), $utf8, "Verify string_vianame(\"$loose_name\") is the proper utf8");
#diag("$name, $utf8");
}
close $fh;