# selection of names is tested, a higher percentage of regular names is tested
# than algorithmically-determined names.
+my $run_slow_tests = $ENV{PERL_RUN_SLOW_TESTS} || 0;
+
my $RUN_SLOW_TESTS_EVERY_CODE_POINT = 100;
# If $ENV{PERL_RUN_SLOW_TESTS} is at least 1 and less than the number above,
-# all code points with names are tested. If it is at least that number, all
-# 1,114,112 Unicode code points are tested.
+# all code points with names are tested, including wildcard search names. If
+# it is at least that number, all 1,114,112 Unicode code points are tested.
# Because \N{} is compile time, any warnings will get generated before
# execution, so have to have an array, and arrange things so no warning
}
sub test_vianame ($$$) {
+ CORE::state $wildcard_count = 0;
# Run the vianame tests on a code point, both loose and full
# Get a copy of the name modified to stress the loose tests.
my $loose_name = get_loose_name($name);
+ my $right_anchor;
+
# Switch loose and full in vianame vs string_vianame half the time
if (rand() < .5) {
use charnames ":full";
- $all_pass &= is(charnames::vianame($name), $i, "Verify vianame(\"$name\") is 0x$hex");
+ $all_pass &= is(charnames::vianame($name), $i,
+ "Verify vianame(\"$name\") is 0x$hex");
use charnames ":loose";
- $all_pass &= is(charnames::string_vianame($loose_name), chr($i), "Verify string_vianame(\"$loose_name\") is chr(0x$hex)");
+ $all_pass &= is(charnames::string_vianame($loose_name), chr($i),
+ "Verify string_vianame(\"$loose_name\") is chr(0x$hex)");
+ $right_anchor = '\\Z';
}
else {
use charnames ":loose";
- $all_pass &= is(charnames::vianame($loose_name), $i, "Verify vianame(\"$loose_name\") is 0x$hex");
+ $all_pass &= is(charnames::vianame($loose_name), $i,
+ "Verify vianame(\"$loose_name\") is 0x$hex");
use charnames ":full";
- $all_pass &= is(charnames::string_vianame($name), chr($i), "Verify string_vianame(\"$name\") is chr(0x$hex)");
+ $all_pass &= is(charnames::string_vianame($name), chr($i),
+ "Verify string_vianame(\"$name\") is chr(0x$hex)");
+ $right_anchor = '\\z';
}
+ my $left_anchor = (rand() < .5) ? '^' : '\\A';
+
# \p{name=} is always loose matching
- $all_pass &= like(chr($i), qr/^\p{name=$loose_name}$/, "Verify /\p{name=$loose_name}/ matches chr(0x$hex)");
+ $all_pass &= like(chr($i), qr/^\p{name=$loose_name}$/,
+ "Verify /\p{name=$loose_name}/ matches chr(0x$hex)");
+
+ $wildcard_count++;
+
+ # XXX temporary to see if the failure we are occasionally seeing is
+ # confined to this code point. GH #17671
+ next if $i == 0;
+
+ # Because wildcard name matching is so real-time intensive, do it less
+ # frequently than the others
+ if ($wildcard_count >= 10) {
+ $wildcard_count = 0;
+
+ # A few control characters have anomalous names containing
+ # parentheses, which need to be escaped.
+ my $name_ref = \$name;
+ my $mod_name;
+ if ($i <= 0x85) { # NEL in ASCII; affected controls are lower than
+ # this in EBCDIC
+ $mod_name = $name =~ s/([()])/\\$1/gr;
+ $name_ref = \$mod_name;
+ }
+
+ # We anchor the name, randomly with the possible anchors.
+ my $assembled = $left_anchor. $$name_ref . $right_anchor;
+
+ # \p{name=/.../} is always full matching
+ $all_pass &= like(chr($i), qr!^\p{name=/$assembled/}!,
+ "Verify /\p{name=/$assembled/} matches chr(0x$hex)");
+ }
return $all_pass;
}
cmp_ok($warning_count, '==', scalar @WARN, "Verify vianame doesn't warn on unknown names");
ok (! defined charnames::string_vianame("MORE NONE SUCH"), "Verify string_vianame returns undef for an undefined name");
cmp_ok($warning_count, '==', scalar @WARN, "Verify string_vianame doesn't warn on unknown names");
+ ok (! defined charnames::vianame(""), "Verify vianame returns undef for an empty value");
+ cmp_ok($warning_count, '==', scalar @WARN, "... and no warning is generated");
+ ok (! defined charnames::string_vianame(""), "Verify string_vianame returns undef for an empty value");
+ cmp_ok($warning_count, '==', scalar @WARN, "... and no warning is generated");
+
eval "qr/\\p{name=MORE NONE SUCH}/";
like($@, qr/Can't find Unicode property definition "name=MORE NONE SUCH"/,
'\p{name=} returns an appropriate error message on an undefined name');
test_vianame(0x116C, "116C", "HANGUL JUNGSEONG OE");
test_vianame(0x1180, "1180", "HANGUL JUNGSEONG O-E");
+like(chr(0x59C3), qr/\p{name=\/\ACJK UNIFIED IDEOGRAPH-59C3\z\/}/,
+ 'Verify name wildcards delimitters can be escaped');
+like(chr(0xD800), qr!\p{name=/\A\z/}!,
+ 'Verify works on matching an empty name');
{
no warnings 'deprecated';
# SADAHIRO Tomoyuki's suggestion is to ensure that the UTF-8ness of both
# arguments are identical before calling index.
# To do this can take advantage of the fact that unicore/Name.pl is 7 bit
-# (or at least should be). So assert that that it's true here. EBCDIC
+# (or at least should be). So assert that that is true here. EBCDIC
# may be a problem (khw).
my $names = do "unicore/Name.pl";
$seed = srand;
}
- my $run_slow_tests = $ENV{PERL_RUN_SLOW_TESTS} || 0;
-
# We will look at the data grouped in "blocks" of the following
# size.
my $block_size_bits = 8; # above 16 is not sensible
my $loose_name = get_loose_name($name);
use charnames ":loose";
is(charnames::string_vianame($loose_name), $utf8, "Verify string_vianame(\"$loose_name\") is the proper utf8");
+
+ like($utf8, qr/^\p{name=$name}$/, "Verify /\p{name=$name}\$/ is the proper utf8");
+ like($utf8, qr/^\p{name=$loose_name}$/, "Verify /\p{name=$loose_name}\$/ is the proper utf8");
+ like($utf8, qr!^\p{name=/\A$name\z/}!, "Verify /\p{name=/$\A$name\z/} is the proper utf8");
#diag("$name, $utf8");
}
close $fh;