ext/XS-APItest/t/handy.t

   1 #!perl -w
   2
   3 use strict;
   4 use Test::More;
   5
   6 use XS::APItest;
   7
   8 use Unicode::UCD qw(prop_invlist);
   9
  10 sub truth($) {  # Converts values so is() works
  11     return (shift) ? 1 : 0;
  12 }
  13
  14 require POSIX;
  15 my $locale = POSIX::setlocale( &POSIX::LC_ALL, "C");
  16 if (defined $locale && $locale eq 'C') {
  17     use locale;
  18
  19     # Some locale implementations don't have the 128-255 characters all
  20     # mean nothing.  Skip the locale tests in that situation
  21     for my $i (128 .. 255) {
  22         if (chr($i) =~ /[[:print:]]/) {
  23             undef $locale;
  24             last;
  25         }
  26     }
  27 }
  28
  29 my %properties = (
  30                    # name => Lookup-property name
  31                    alnum => 'Word',
  32                    wordchar => 'Word',
  33                    alnumc => 'Alnum',
  34                    alpha => 'Alpha',
  35                    ascii => 'ASCII',
  36                    blank => 'Blank',
  37                    cntrl => 'Control',
  38                    digit => 'Digit',
  39                    graph => 'Graph',
  40                    idfirst => '_Perl_IDStart',
  41                    lower => 'Lower',
  42                    print => 'Print',
  43                    psxspc => 'XPosixSpace',
  44                    punct => 'XPosixPunct',
  45                    quotemeta => '_Perl_Quotemeta',
  46                    space => 'XPerlSpace',
  47                    vertws => 'VertSpace',
  48                    upper => 'Upper',
  49                    xdigit => 'XDigit',
  50                 );
  51
  52 my @warnings;
  53 local $SIG{__WARN__} = sub { push @warnings, @_ };
  54
  55 use charnames ();
  56 foreach my $name (sort keys %properties) {
  57     my $property = $properties{$name};
  58     my @invlist = prop_invlist($property, '_perl_core_internal_ok');
  59     if (! @invlist) {
  60         fail("No inversion list found for $property");
  61         next;
  62     }
  63
  64     # Include all the Latin1 code points, plus 0x100.
  65     my @code_points = (0 .. 256);
  66
  67     # Then include the next few boundaries above those from this property
  68     my $above_latins = 0;
  69     foreach my $range_start (@invlist) {
  70         next if $range_start < 257;
  71         push @code_points, $range_start - 1, $range_start;
  72         $above_latins++;
  73         last if $above_latins > 5;
  74     }
  75
  76     # This makes sure we are using the Perl definition of idfirst, and not the
  77     # Unicode.  There are a few differences.
  78     push @code_points, ord "\N{ESTIMATED SYMBOL}" if $name eq 'idfirst';
  79
  80     # And finally one non-Unicode code point.
  81     push @code_points, 0x110000;    # Above Unicode, no prop should match
  82     no warnings 'non_unicode';
  83
  84     for my $i (@code_points) {
  85         my $function = uc($name);
  86
  87         my $matches = Unicode::UCD::_search_invlist(\@invlist, $i);
  88         if (! defined $matches) {
  89             $matches = 0;
  90         }
  91         else {
  92             $matches = truth(! ($matches % 2));
  93         }
  94
  95         my $ret;
  96         my $char_name = charnames::viacode($i) // "No name";
  97         my $display_name = sprintf "\\N{U+%02X, %s}", $i, $char_name;
  98
  99         if ($name eq 'quotemeta') { # There is only one macro for this, and is
 100                                     # defined only for Latin1 range
 101             $ret = truth eval "test_is${function}($i)";
 102             if ($@) {
 103                 fail $@;
 104             }
 105             else {
 106                 my $truth = truth($matches && $i < 256);
 107                 is ($ret, $truth, "is${function}( $display_name ) == $truth");
 108             }
 109             next;
 110         }
 111
 112         # vertws is always all of Unicode; ALNUM_A and ALNUM_L1 are not
 113         # defined as they were added later, after WORDCHAR was created to be a
 114         # clearer synonym for ALNUM
 115         if ($name ne 'vertws') {
 116             if ($name ne 'alnum') {
 117             $ret = truth eval "test_is${function}_A($i)";
 118             if ($@) {
 119                 fail($@);
 120             }
 121             else {
 122                 my $truth = truth($matches && $i < 128);
 123                 is ($ret, $truth, "is${function}_A( $display_name ) == $truth");
 124             }
 125             $ret = truth eval "test_is${function}_L1($i)";
 126             if ($@) {
 127                 fail($@);
 128             }
 129             else {
 130                 my $truth = truth($matches && $i < 256);
 131                 is ($ret, $truth, "is${function}_L1( $display_name ) == $truth");
 132             }
 133             }
 134
 135             next unless defined $locale;
 136             use locale;
 137
 138             $ret = truth eval "test_is${function}_LC($i)";
 139             if ($@) {
 140                 fail($@);
 141             }
 142             else {
 143                 my $truth = truth($matches && $i < 128);
 144                 is ($ret, $truth, "is${function}_LC( $display_name ) == $truth");
 145             }
 146         }
 147
 148         $ret = truth eval "test_is${function}_uni($i)";
 149         if ($@) {
 150             fail($@);
 151         }
 152         else {
 153             is ($ret, $matches, "is${function}_uni( $display_name ) == $matches");
 154         }
 155
 156         if (defined $locale && $name ne 'vertws') {
 157             use locale;
 158
 159             $ret = truth eval "test_is${function}_LC_uvchr('$i')";
 160             if ($@) {
 161                 fail($@);
 162             }
 163             else {
 164                 my $truth = truth($matches && ($i < 128 || $i > 255));
 165                 is ($ret, $truth, "is${function}_LC_uvchr( $display_name ) == $truth");
 166             }
 167         }
 168
 169         my $char = chr($i);
 170         utf8::upgrade($char);
 171         $char = quotemeta $char if $char eq '\\' || $char eq "'";
 172         $ret = truth eval "test_is${function}_utf8('$char')";
 173         if ($@) {
 174             fail($@);
 175         }
 176         else {
 177             is ($ret, $matches, "is${function}_utf8( $display_name ) == $matches");
 178         }
 179
 180         next if $name eq 'vertws' || ! defined $locale;
 181         use locale;
 182
 183         $ret = truth eval "test_is${function}_LC_utf8('$char')";
 184         if ($@) {
 185             fail($@);
 186         }
 187         else {
 188             my $truth = truth($matches && ($i < 128 || $i > 255));
 189             is ($ret, $truth, "is${function}_LC_utf8( $display_name ) == $truth");
 190         }
 191     }
 192 }
 193
 194 # This is primarily to make sure that no non-Unicode warnings get generated
 195 is(scalar @warnings, 0, "No warnings were generated " . join ", ", @warnings);
 196
 197 done_testing;