t/uni/class.t

   1 BEGIN {
   2     chdir 't' if -d 't';
   3     @INC = qw(../lib .);
   4     require "test.pl";
   5 }
   6
   7 plan tests => 4670;
   8
   9 sub MyUniClass {
  10   <<END;
  11 0030    004F
  12 END
  13 }
  14
  15 sub Other::Class {
  16   <<END;
  17 0040    005F
  18 END
  19 }
  20
  21 sub A::B::Intersection {
  22   <<END;
  23 +main::MyUniClass
  24 &Other::Class
  25 END
  26 }
  27
  28 sub test_regexp ($$) {
  29   # test that given string consists of N-1 chars matching $qr1, and 1
  30   # char matching $qr2
  31   my ($str, $blk) = @_;
  32
  33   # constructing these objects here makes the last test loop go much faster
  34   my $qr1 = qr/(\p{$blk}+)/;
  35   if ($str =~ $qr1) {
  36     is($1, substr($str, 0, -1));                # all except last char
  37   }
  38   else {
  39     fail('first N-1 chars did not match');
  40   }
  41
  42   my $qr2 = qr/(\P{$blk}+)/;
  43   if ($str =~ $qr2) {
  44     is($1, substr($str, -1));                   # only last char
  45   }
  46   else {
  47     fail('last char did not match');
  48   }
  49 }
  50
  51 use strict;
  52
  53 my $str = join "", map chr($_), 0x20 .. 0x6F;
  54
  55 # make sure it finds built-in class
  56 is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
  57 is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
  58
  59 # make sure it finds user-defined class
  60 is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
  61
  62 # make sure it finds class in other package
  63 is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
  64
  65 # make sure it finds class in other OTHER package
  66 is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
  67
  68 # all of these should look in lib/unicore/bc/AL.pl
  69 $str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
  70 is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
  71 is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
  72 is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
  73 is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
  74
  75 # make sure InGreek works
  76 $str = "[\x{038B}\x{038C}\x{038D}]";
  77
  78 is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
  79 is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
  80 is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
  81 is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
  82 is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
  83
  84 use File::Spec;
  85 my $updir = File::Spec->updir;
  86
  87 # the %utf8::... hashes are already in existence
  88 # because utf8_pva.pl was run by utf8_heavy.pl
  89
  90 *utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning
  91
  92 no warnings 'utf8'; # we do not want warnings about surrogates etc
  93
  94 # non-General Category and non-Script
  95 while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
  96   my $prop_name = $utf8::PropertyAlias{$abbrev};
  97   next unless $prop_name;
  98   next if $abbrev eq "gc_sc";
  99
 100   for (sort keys %$files) {
 101     my $filename = File::Spec->catfile(
 102       $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
 103     );
 104
 105     next unless -e $filename;
 106     my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
 107     my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
 108
 109     for my $p ($prop_name, $abbrev) {
 110       for my $c ($files->{$_}, $_) {
 111         is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
 112         is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
 113       }
 114     }
 115   }
 116 }
 117
 118 # General Category and Script
 119 for my $p ('gc', 'sc') {
 120   while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
 121     my $filename = File::Spec->catfile(
 122       $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
 123     );
 124
 125     next unless -e $filename;
 126     my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
 127     my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
 128
 129     for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
 130       for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
 131         is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
 132         is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
 133         test_regexp ($str, $y);
 134       }
 135     }
 136   }
 137 }
 138
 139 # test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
 140 SKIP:
 141 {
 142   skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS';
 143
 144   # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
 145   # return true. Try to work around this by reading the filenames explicitly
 146   # to get a case sensitive test.  N.B.  This will fail if filename case is
 147   # not preserved because you might go looking for a class name of CF or cf
 148   # when you really want Cf.  Storing case sensitive data in filenames is
 149   # simply not portable.
 150
 151   my %files;
 152
 153   my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
 154   opendir D, $dirname or die $!;
 155   @files{readdir(D)} = ();
 156   closedir D;
 157
 158   for (keys %utf8::PA_reverse) {
 159     my $leafname = "$utf8::PA_reverse{$_}.pl";
 160     next unless exists $files{$leafname};
 161
 162     my $filename = File::Spec->catfile($dirname, $leafname);
 163
 164     my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
 165     my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
 166
 167     for my $x ('gc', 'General Category') {
 168       print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
 169       for my $y ($_, $utf8::PA_reverse{$_}) {
 170         is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
 171         is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
 172         test_regexp ($str, $y);
 173       }
 174     }
 175   }
 176 }
 177
 178 # test the blocks (InFoobar)
 179 for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
 180   my $filename = File::Spec->catfile(
 181     $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
 182   );
 183
 184   next unless -e $filename;
 185
 186   print "# In$_ $filename\n";
 187
 188   my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
 189   my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
 190
 191   my $blk = $_;
 192
 193   test_regexp ($str, $blk);
 194   $blk =~ s/^In/Block:/;
 195   test_regexp ($str, $blk);
 196 }
 197