use XS::APItest;
-use Unicode::UCD qw(prop_invlist prop_invmap);
-
my $tab = " " x 4; # Indent subsidiary tests this much
+use Unicode::UCD qw(search_invlist prop_invmap prop_invlist);
+my ($charname_list, $charname_map, $format, $default) = prop_invmap("Name Alias");
+
+sub get_charname($) {
+ my $cp = shift;
+
+ # If there is a an abbreviation for the code point name, use it
+ my $name_index = search_invlist(\@{$charname_list}, $cp);
+ if (defined $name_index) {
+ my $synonyms = $charname_map->[$name_index];
+ if (ref $synonyms) {
+ my $pat = qr/: abbreviation/;
+ my @abbreviations = grep { $_ =~ $pat } @$synonyms;
+ if (@abbreviations) {
+ return $abbreviations[0] =~ s/$pat//r;
+ }
+ }
+ }
+
+ # Otherwise, use the full name
+ use charnames ();
+ return charnames::viacode($cp) // "No name";
+}
+
sub truth($) { # Converts values so is() works
return (shift) ? 1 : 0;
}
return (" ($locale)", 1);
}
+sub try_malforming($$$)
+{
+ # Determines if the tests for malformed UTF-8 should be done. When done,
+ # the .xs code creates malformations by pretending the length is shorter
+ # than it actually is. Some things can't be malformed, and sometimes this
+ # test knows that the current code doesn't look for a malformation under
+ # various circumstances.
+
+ my ($i, $function, $using_locale) = @_;
+
+ # Single bytes can't be malformed
+ return 0 if $i < ((ord "A" == 65) ? 128 : 160);
+
+ # ASCII doesn't need to ever look beyond the first byte.
+ return 0 if $function eq "ASCII";
+
+ # No controls above 255, so the code doesn't look at those
+ return 0 if $i > 255 && $function eq "CNTRL";
+
+ # No non-ASCII digits below 256, except if using locales.
+ return 0 if $i < 256 && ! $using_locale && $function =~ /X?DIGIT/;
+
+ return 1;
+}
+
my %properties = (
# name => Lookup-property name
alnum => 'Word',
xdigit => 'XDigit',
);
+my %seen;
my @warnings;
local $SIG{__WARN__} = sub { push @warnings, @_ };
-use charnames ();
-foreach my $name (sort keys %properties) {
- my $property = $properties{$name};
- my @invlist = prop_invlist($property, '_perl_core_internal_ok');
- if (! @invlist) {
-
- # An empty return could mean an unknown property, or merely that it is
- # empty. Call in scalar context to differentiate
- if (! prop_invlist($property, '_perl_core_internal_ok')) {
- fail("No inversion list found for $property");
- next;
+my %utf8_param_code = (
+ "_safe" => 0,
+ "_safe, malformed" => 1,
+ "deprecated unsafe" => -1,
+ );
+
+foreach my $name (sort keys %properties, 'octal') {
+ my @invlist;
+ if ($name eq 'octal') {
+ # Hand-roll an inversion list with 0-7 in it and nothing else.
+ push @invlist, ord "0", ord "8";
+ }
+ else {
+ my $property = $properties{$name};
+ @invlist = prop_invlist($property, '_perl_core_internal_ok');
+ if (! @invlist) {
+
+ # An empty return could mean an unknown property, or merely that
+ # it is empty. Call in scalar context to differentiate
+ if (! prop_invlist($property, '_perl_core_internal_ok')) {
+ fail("No inversion list found for $property");
+ next;
+ }
}
}
or diag("@warnings");
undef @warnings;
- my $matches = Unicode::UCD::search_invlist(\@invlist, $i);
+ my $matches = search_invlist(\@invlist, $i);
if (! defined $matches) {
$matches = 0;
}
}
my $ret;
- my $char_name = charnames::viacode($i) // "No name";
+ my $char_name = get_charname($j);
my $display_name = sprintf "\\x{%02X, %s}", $i, $char_name;
my $display_call = "is${function}( $display_name )";
- if ($name eq 'quotemeta') { # There is only one macro for this, and is
- # defined only for Latin1 range
- $ret = truth eval "test_is${function}($i)";
- if (is ($@, "", "$display_call didn't give error")) {
- my $truth = truth($matches && $i < 256);
- is ($ret, $truth, "${tab}And returns $truth");
- }
- next;
- }
-
- foreach my $suffix (qw(_A _L1 _LC _uni _LC_uvchr _utf8 _LC_utf8)) {
+ foreach my $suffix ("", "_A", "_L1", "_LC", "_uni", "_uvchr",
+ "_LC_uvchr", "_utf8", "_LC_utf8")
+ {
# Not all possible macros have been defined
if ($name eq 'vertws') {
# vertws is always all of Unicode
- next if $suffix ne "_uni" && $suffix ne "_utf8";
+ next if $suffix !~ / ^ _ ( uni | uvchr | utf8 ) $ /x;
}
elsif ($name eq 'alnum') {
- # ALNUM_A and ALNUM_L1 are not defined as they were added
- # later, after WORDCHAR was created to be a clearer synonym
- # for ALNUM
- next if $suffix eq '_A' || $suffix eq '_L1';
+ # ALNUM_A, ALNUM_L1, and ALNUM_uvchr are not defined as these
+ # suffixes were added later, after WORDCHAR was created to be
+ # a clearer synonym for ALNUM
+ next if $suffix eq '_A'
+ || $suffix eq '_L1'
+ || $suffix eq '_uvchr';
+ }
+ elsif ($name eq 'octal') {
+ next if $suffix ne "" && $suffix ne '_A' && $suffix ne '_L1';
+ }
+ elsif ($name eq 'quotemeta') {
+ # There is only one macro for this, and is defined only for
+ # Latin1 range
+ next if $suffix ne ""
}
foreach my $locale ("", $base_locale, $utf8_locale) {
# The single byte functions are false for
# above-Latin1
if ($i >= 256) {
- $truth = 0 if $suffix =~ / ^ _A | _L [1C] $ /x;
+ $truth = 0
+ if $suffix=~ / ^ ( _A | _L [1C] )? $ /x;
}
- elsif (utf8::native_to_unicode($i) >= 128) {
+ elsif ( utf8::native_to_unicode($i) >= 128
+ && $name ne 'quotemeta')
+ {
- # The _A functions are false
+ # The no-suffix and _A functions are false
# for non-ASCII. So are _LC functions on a
# non-UTF-8 locale
$truth = 0 if $suffix eq "_A"
+ || $suffix eq ""
|| ( $suffix =~ /LC/
&& ! $locale_is_utf8);
}
$truth = $matches;
}
- my $display_call = "is${function}$suffix("
- . " $display_name )$display_locale";
- $ret = truth eval "test_is${function}$suffix('$char')";
- if (is ($@, "", "$display_call didn't give error")) {
+ foreach my $utf8_param("_safe",
+ "_safe, malformed",
+ "deprecated unsafe"
+ )
+ {
+ my $utf8_param_code = $utf8_param_code{$utf8_param};
+ my $expect_error = $utf8_param_code > 0;
+ next if $expect_error
+ && ! try_malforming($i, $function,
+ $suffix =~ /LC/);
+
+ my $display_call = "is${function}$suffix( $display_name"
+ . ", $utf8_param )$display_locale";
+ $ret = truth eval "test_is${function}$suffix('$char',"
+ . " $utf8_param_code)";
+ if ($expect_error) {
+ isnt ($@, "",
+ "expected and got error in $display_call");
+ like($@, qr/Malformed UTF-8 character/,
+ "${tab}And got expected message");
+ if (is (@warnings, 1,
+ "${tab}Got a single warning besides"))
+ {
+ like($warnings[0],
+ qr/Malformed UTF-8 character.*short/,
+ "${tab}Got expected warning");
+ }
+ else {
+ diag("@warnings");
+ }
+ undef @warnings;
+ }
+ elsif (is ($@, "", "$display_call didn't give error")) {
is ($ret, $truth,
"${tab}And correctly returned $truth");
+ if ($utf8_param_code < 0) {
+ my $warnings_ok;
+ my $unique_function = "is" . $function . $suffix;
+ if (! $seen{$unique_function}++) {
+ $warnings_ok = is(@warnings, 1,
+ "${tab}This is first call to"
+ . " $unique_function; Got a single"
+ . " warning");
+ if ($warnings_ok) {
+ $warnings_ok = like($warnings[0],
+ qr/starting in Perl .* will require an additional parameter/,
+ "${tab}The warning was the expected"
+ . " deprecation one");
+ }
+ }
+ else {
+ $warnings_ok = is(@warnings, 0,
+ "${tab}This subsequent call to"
+ . " $unique_function did not warn");
+ }
+ $warnings_ok or diag("@warnings");
+ undef @warnings;
+ }
}
+ }
}
}
}
}
}
-# Test isOCTAL()
-for my $i (0 .. 256, 0x110000) {
- my $char_name = charnames::viacode($i) // "No name";
- my $display_name = sprintf "\\N{U+%02X, %s}", $i, $char_name;
- my $truth = truth($i >= ord('0') && $i <= ord('7'));
-
- my $ret = truth test_isOCTAL_A($i);
- is($ret, $truth, "isOCTAL_A( $display_name ) == $truth");
-
- $ret = truth test_isOCTAL_L1($i);
- is($ret, $truth, "isOCTAL_L1( $display_name ) == $truth");
-}
-
my %to_properties = (
FOLD => 'Case_Folding',
LOWER => 'Lowercase_Mapping',
my $i = utf8::native_to_unicode($j);
my $function = $name;
- my $index = Unicode::UCD::search_invlist(\@{$list_ref}, $j);
+ my $index = search_invlist(\@{$list_ref}, $j);
my $ret;
- my $char_name = charnames::viacode($j) // "No name";
+ my $char_name = get_charname($j);
my $display_name = sprintf "\\N{U+%02X, %s}", $i, $char_name;
foreach my $suffix ("", "_L1", "_LC") {
}
}
- # The _uni and _utf8 functions return both the ordinal of the first
- # code point of the result, and the result in utf8. The .xs tests
- # return these in an array, in [0] and [1] respectively, with [2] the
- # length of the utf8 in bytes.
+ # The _uni, uvchr, and _utf8 functions return both the ordinal of the
+ # first code point of the result, and the result in utf8. The .xs
+ # tests return these in an array, in [0] and [1] respectively, with
+ # [2] the length of the utf8 in bytes.
my $utf8_should_be = "";
my $first_ord_should_be;
if (ref $map_ref->[$index]) { # A multi-char result
}
utf8::upgrade($utf8_should_be);
- # Test _uni
- my $s;
- my $len;
- my $display_call = "to${function}_uni( $display_name )";
- $ret = eval "test_to${function}_uni($j)";
- if (is ($@, "", "$display_call didn't give error")) {
- is ($ret->[0], $first_ord_should_be,
- sprintf("${tab}And correctly returned 0x%02X",
- $first_ord_should_be));
- is ($ret->[1], $utf8_should_be, "${tab}Got correct utf8");
- use bytes;
- is ($ret->[2], length $utf8_should_be,
- "${tab}Got correct number of bytes for utf8 length");
+ # Test _uni, uvchr
+ foreach my $suffix ('_uni', '_uvchr') {
+ my $s;
+ my $len;
+ my $display_call = "to${function}$suffix( $display_name )";
+ $ret = eval "test_to${function}$suffix($j)";
+ if (is ($@, "", "$display_call didn't give error")) {
+ is ($ret->[0], $first_ord_should_be,
+ sprintf("${tab}And correctly returned 0x%02X",
+ $first_ord_should_be));
+ is ($ret->[1], $utf8_should_be, "${tab}Got correct utf8");
+ use bytes;
+ is ($ret->[2], length $utf8_should_be,
+ "${tab}Got correct number of bytes for utf8 length");
+ }
}
# Test _utf8