t/uni/class.t

   1 BEGIN {
   2     chdir 't' if -d 't';
   3     @INC = qw(../lib .);
   4     require "test.pl";
   5 }
   6
   7 plan tests => 5092;
   8
   9 sub MyUniClass {
  10   <<END;
  11 0030    004F
  12 END
  13 }
  14
  15 sub Other::Class {
  16   <<END;
  17 0040    005F
  18 END
  19 }
  20
  21 sub A::B::Intersection {
  22   <<END;
  23 +main::MyUniClass
  24 &Other::Class
  25 END
  26 }
  27
  28 sub test_regexp ($$) {
  29   # test that given string consists of N-1 chars matching $qr1, and 1
  30   # char matching $qr2
  31   my ($str, $blk) = @_;
  32
  33   # constructing these objects here makes the last test loop go much faster
  34   my $qr1 = qr/(\p{$blk}+)/;
  35   if ($str =~ $qr1) {
  36     is($1, substr($str, 0, -1));                # all except last char
  37   }
  38   else {
  39     fail('first N-1 chars did not match');
  40   }
  41
  42   my $qr2 = qr/(\P{$blk}+)/;
  43   if ($str =~ $qr2) {
  44     is($1, substr($str, -1));                   # only last char
  45   }
  46   else {
  47     fail('last char did not match');
  48   }
  49 }
  50
  51 use strict;
  52
  53 my $str;
  54
  55 if (ord('A') == 193) {
  56     $str = join "", map chr($_), 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 0xF0 .. 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 0x7C, 0xC1 .. 0xC9, 0xD1 .. 0xD9, 0xE2 .. 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D, 0x79, 0x81 .. 0x89, 0x91 .. 0x96; # IBM-1047
  57 } else {
  58     $str = join "", map chr($_), 0x20 .. 0x6F;
  59 }
  60
  61 # make sure it finds built-in class
  62 is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
  63 is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
  64
  65 # make sure it finds user-defined class
  66 is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
  67
  68 # make sure it finds class in other package
  69 is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
  70
  71 # make sure it finds class in other OTHER package
  72 is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
  73
  74 # all of these should look in lib/unicore/bc/AL.pl
  75 $str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
  76 is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
  77 is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
  78 is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
  79 is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
  80
  81 # make sure InGreek works
  82 $str = "[\x{038B}\x{038C}\x{038D}]";
  83
  84 is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
  85 is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
  86 is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
  87 is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
  88 is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
  89
  90 use File::Spec;
  91 my $updir = File::Spec->updir;
  92
  93 # the %utf8::... hashes are already in existence
  94 # because utf8_pva.pl was run by utf8_heavy.pl
  95
  96 *utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning
  97
  98 no warnings 'utf8'; # we do not want warnings about surrogates etc
  99
 100 sub char_range {
 101     my ($h1, $h2) = @_;
 102
 103     my $str;
 104
 105     if (ord('A') == 193 && $h1 < 256) {
 106         my $h3 = ($h2 || $h1) + 1;
 107         if ($h3 - $h1 == 1) {
 108             $str = join "", pack 'U*', $h1 .. $h3; # Using pack since chr doesn't generate Unicode chars for value < 256.
 109         } elsif ($h3 - $h1 > 1) {
 110             for (my $i = $h1; $i <= $h3; $i++) {
 111                 $str = join "", $str, pack 'U*', $i;
 112             }
 113         }
 114     } else {
 115         $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
 116     }
 117
 118     return $str;
 119 }
 120
 121 # non-General Category and non-Script
 122 while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
 123   my $prop_name = $utf8::PropertyAlias{$abbrev};
 124   next unless $prop_name;
 125   next if $abbrev eq "gc_sc";
 126
 127   for (sort keys %$files) {
 128     my $filename = File::Spec->catfile(
 129       $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
 130     );
 131
 132     next unless -e $filename;
 133     my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
 134
 135     my $str = char_range($h1, $h2);
 136
 137     for my $p ($prop_name, $abbrev) {
 138       for my $c ($files->{$_}, $_) {
 139         is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
 140         is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
 141       }
 142     }
 143   }
 144 }
 145
 146 # General Category and Script
 147 for my $p ('gc', 'sc') {
 148   while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
 149     my $filename = File::Spec->catfile(
 150       $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
 151     );
 152
 153     next unless -e $filename;
 154     my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
 155
 156     my $str = char_range($h1, $h2);
 157
 158     for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
 159       for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
 160         is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
 161         is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
 162         SKIP: {
 163           skip("surrogate", 1) if $abbr eq 'cs';
 164           test_regexp ($str, $y);
 165         }
 166       }
 167     }
 168   }
 169 }
 170
 171 # test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
 172 SKIP:
 173 {
 174   skip "Can't reliably derive class names from file names", 576 if $^O eq 'VMS';
 175
 176   # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
 177   # return true. Try to work around this by reading the filenames explicitly
 178   # to get a case sensitive test.  N.B.  This will fail if filename case is
 179   # not preserved because you might go looking for a class name of CF or cf
 180   # when you really want Cf.  Storing case sensitive data in filenames is
 181   # simply not portable.
 182
 183   my %files;
 184
 185   my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
 186   opendir D, $dirname or die $!;
 187   @files{readdir(D)} = ();
 188   closedir D;
 189
 190   for (keys %utf8::PA_reverse) {
 191     my $leafname = "$utf8::PA_reverse{$_}.pl";
 192     next unless exists $files{$leafname};
 193
 194     my $filename = File::Spec->catfile($dirname, $leafname);
 195
 196     my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
 197
 198     my $str = char_range($h1, $h2);
 199
 200     for my $x ('gc', 'General Category') {
 201       print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
 202       for my $y ($_, $utf8::PA_reverse{$_}) {
 203         is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
 204         is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
 205         test_regexp ($str, $y);
 206       }
 207     }
 208   }
 209 }
 210
 211 # test the blocks (InFoobar)
 212 for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
 213   my $filename = File::Spec->catfile(
 214     $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
 215   );
 216
 217   next unless -e $filename;
 218
 219   print "# In$_ $filename\n";
 220
 221   my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
 222
 223   my $str = char_range($h1, $h2);
 224
 225   my $blk = $_;
 226
 227   SKIP: {
 228     skip($blk, 2) if $blk =~ /surrogates/i;
 229     test_regexp ($str, $blk);
 230     $blk =~ s/^In/Block:/;
 231     test_regexp ($str, $blk);
 232   }
 233 }
 234