8 use Unicode::UCD qw(prop_invlist);
10 sub truth($) { # Converts values so is() works
11 return (shift) ? 1 : 0;
15 my $locale = POSIX::setlocale( &POSIX::LC_ALL, "C");
16 if (defined $locale && $locale eq 'C') {
19 # Some locale implementations don't have the 128-255 characters all
20 # mean nothing. Skip the locale tests in that situation
21 for my $i (128 .. 255) {
22 if (chr($i) =~ /[[:print:]]/) {
30 # name => Lookup-property name
40 idfirst => '_Perl_IDStart',
43 psxspc => 'XPosixSpace',
44 punct => 'XPosixPunct',
45 quotemeta => '_Perl_Quotemeta',
46 space => 'XPerlSpace',
47 vertws => 'VertSpace',
53 local $SIG{__WARN__} = sub { push @warnings, @_ };
56 foreach my $name (sort keys %properties) {
57 my $property = $properties{$name};
58 my @invlist = prop_invlist($property, '_perl_core_internal_ok');
60 fail("No inversion list found for $property");
64 # Include all the Latin1 code points, plus 0x100.
65 my @code_points = (0 .. 256);
67 # Then include the next few boundaries above those from this property
69 foreach my $range_start (@invlist) {
70 next if $range_start < 257;
71 push @code_points, $range_start - 1, $range_start;
73 last if $above_latins > 5;
76 # This makes sure we are using the Perl definition of idfirst, and not the
77 # Unicode. There are a few differences.
78 push @code_points, ord "\N{ESTIMATED SYMBOL}" if $name eq 'idfirst';
80 # And finally one non-Unicode code point.
81 push @code_points, 0x110000; # Above Unicode, no prop should match
82 no warnings 'non_unicode';
84 for my $i (@code_points) {
85 my $function = uc($name);
87 my $matches = Unicode::UCD::_search_invlist(\@invlist, $i);
88 if (! defined $matches) {
92 $matches = truth(! ($matches % 2));
96 my $char_name = charnames::viacode($i) // "No name";
97 my $display_name = sprintf "\\N{U+%02X, %s}", $i, $char_name;
99 if ($name eq 'quotemeta') { # There is only one macro for this, and is
100 # defined only for Latin1 range
101 $ret = truth eval "test_is${function}($i)";
106 my $truth = truth($matches && $i < 256);
107 is ($ret, $truth, "is${function}( $display_name ) == $truth");
112 # vertws is always all of Unicode; ALNUM_A and ALNUM_L1 are not
113 # defined as they were added later, after WORDCHAR was created to be a
114 # clearer synonym for ALNUM
115 if ($name ne 'vertws') {
116 if ($name ne 'alnum') {
117 $ret = truth eval "test_is${function}_A($i)";
122 my $truth = truth($matches && $i < 128);
123 is ($ret, $truth, "is${function}_A( $display_name ) == $truth");
125 $ret = truth eval "test_is${function}_L1($i)";
130 my $truth = truth($matches && $i < 256);
131 is ($ret, $truth, "is${function}_L1( $display_name ) == $truth");
135 next unless defined $locale;
138 $ret = truth eval "test_is${function}_LC($i)";
143 my $truth = truth($matches && $i < 128);
144 is ($ret, $truth, "is${function}_LC( $display_name ) == $truth");
148 $ret = truth eval "test_is${function}_uni($i)";
153 is ($ret, $matches, "is${function}_uni( $display_name ) == $matches");
156 if (defined $locale && $name ne 'vertws') {
159 $ret = truth eval "test_is${function}_LC_uvchr('$i')";
164 my $truth = truth($matches && ($i < 128 || $i > 255));
165 is ($ret, $truth, "is${function}_LC_uvchr( $display_name ) == $truth");
170 utf8::upgrade($char);
171 $char = quotemeta $char if $char eq '\\' || $char eq "'";
172 $ret = truth eval "test_is${function}_utf8('$char')";
177 is ($ret, $matches, "is${function}_utf8( $display_name ) == $matches");
180 next if $name eq 'vertws' || ! defined $locale;
183 $ret = truth eval "test_is${function}_LC_utf8('$char')";
188 my $truth = truth($matches && ($i < 128 || $i > 255));
189 is ($ret, $truth, "is${function}_LC_utf8( $display_name ) == $truth");
194 # This is primarily to make sure that no non-Unicode warnings get generated
195 is(scalar @warnings, 0, "No warnings were generated " . join ", ", @warnings);