[perl5.git] / t / uni / class.t

BEGIN {
    chdir 't' if -d 't';
    @INC = qw(../lib .);
    require "test.pl";
}

plan tests => 4670;

sub MyUniClass {
  <<END;
0030	004F
END
}

sub Other::Class {
  <<END;
0040	005F
END
}

sub A::B::Intersection {
  <<END;
+main::MyUniClass
&Other::Class
END
}


my $str = join "", map chr($_), 0x20 .. 0x6F;

# make sure it finds built-in class
is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');

# make sure it finds user-defined class
is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');

# make sure it finds class in other package
is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');

# make sure it finds class in other OTHER package
is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');

# all of these should look in lib/unicore/bc/AL.pl
$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");

# make sure InGreek works
$str = "[\x{038B}\x{038C}\x{038D}]";

is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");


use File::Spec;
my $updir = File::Spec->updir;


# the %utf8::... hashes are already in existence
# because utf8_pva.pl was run by utf8_heavy.pl

# non-General Category and non-Script
while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
  my $prop_name = $utf8::PropertyAlias{$abbrev};
  next unless $prop_name;
  next if $abbrev eq "gc_sc";

  for (sort keys %$files) {
    my $filename = File::Spec->catfile(
      $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
    );

    next unless -e $filename;
    my ($h1, $h2) = map hex, split /\t/, (do $filename);
    my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

    for my $p ($prop_name, $abbrev) {
      for my $c ($files->{$_}, $_) {
        is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
        is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
      }
    }
  }
}

# General Category and Script
for my $p ('gc', 'sc') {
  while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
    my $filename = File::Spec->catfile(
      $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
    );

    next unless -e $filename;
    my ($h1, $h2) = map hex, split /\t/, (do $filename);
    my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

    for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
      for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
        is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
        is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
        is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1));
        is($str =~ /(\P{$y}+)/ && $1, substr($str, -1));
      }
    }
  }
}

# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
SKIP:
{
  skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS';

  # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
  # return true. Try to work around this by reading the filenames explicitly
  # to get a case sensitive test.  N.B.  This will fail if filename case is
  # not preserved because you might go looking for a class name of CF or cf
  # when you really want Cf.  Storing case sensitive data in filenames is 
  # simply not portable.

  my %files;

  my $dirname = File::Spec->catdir($updir => lib => unicore => lib => gc_sc);
  opendir D, $dirname or die $!;
  @files{readdir D} = ();
  closedir D;

  for (keys %utf8::PA_reverse) {
    my $leafname = "$utf8::PA_reverse{$_}.pl";
    next unless exists $files{$leafname};

    my $filename = File::Spec->catfile($dirname, $leafname);

    my ($h1, $h2) = map hex, split /\t/, (do $filename);
    my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

    for my $x ('gc', 'General Category') {
      print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
      for my $y ($_, $utf8::PA_reverse{$_}) {
	is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
	is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
	is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1));
	is($str =~ /(\P{$y}+)/ && $1, substr($str, -1));
      }
    }
  }
}

# test the blocks (InFoobar)
for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
  my $filename = File::Spec->catfile(
    $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
  );

  next unless -e $filename;
  my ($h1, $h2) = map hex, split /\t/, (do $filename);
  my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

  my $blk = $_;

  is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1));
  is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1));

  $blk =~ s/^In/Block:/;

  is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1));
  is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1));
}
Commit	Line	Data
0f1b7392 RGS	1	BEGIN {
	2	chdir 't' if -d 't';
	3	@INC = qw(../lib .);
	4	require "test.pl";
	5	}
	6
a2bd7410	7	plan tests => 4670;
0f1b7392 RGS	8
	9	sub MyUniClass {
	10	<<END;
	11	0030 004F
	12	END
	13	}
	14
	15	sub Other::Class {
	16	<<END;
	17	0040 005F
	18	END
	19	}
	20
	21	sub A::B::Intersection {
	22	<<END;
	23	+main::MyUniClass
	24	&Other::Class
	25	END
	26	}
	27
	28
	29	my $str = join "", map chr($_), 0x20 .. 0x6F;
	30
	31	# make sure it finds built-in class
	32	is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
12ac2576	33	is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
0f1b7392 RGS	34
	35	# make sure it finds user-defined class
	36	is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
	37
	38	# make sure it finds class in other package
	39	is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
	40
	41	# make sure it finds class in other OTHER package
	42	is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');
12ac2576 JP	43
	44	# all of these should look in lib/unicore/bc/AL.pl
	45	$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
	46	is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
	47	is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
	48	is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
	49	is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");
	50
	51	# make sure InGreek works
	52	$str = "[\x{038B}\x{038C}\x{038D}]";
	53
	54	is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
	55	is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
	56	is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
	57	is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
	58	is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
	59
	60
	61	use File::Spec;
	62	my $updir = File::Spec->updir;
	63
	64
	65	# the %utf8::... hashes are already in existence
	66	# because utf8_pva.pl was run by utf8_heavy.pl
	67
	68	# non-General Category and non-Script
	69	while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
	70	my $prop_name = $utf8::PropertyAlias{$abbrev};
	71	next unless $prop_name;
	72	next if $abbrev eq "gc_sc";
	73
	74	for (sort keys %$files) {
	75	my $filename = File::Spec->catfile(
	76	$updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
	77	);
	78
	79	next unless -e $filename;
	80	my ($h1, $h2) = map hex, split /\t/, (do $filename);
	81	my $str = join "", map chr, $h1 .. (($h2 \|\| $h1) + 1);
	82
	83	for my $p ($prop_name, $abbrev) {
	84	for my $c ($files->{$_}, $_) {
	85	is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
	86	is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
	87	}
	88	}
	89	}
	90	}
	91
	92	# General Category and Script
	93	for my $p ('gc', 'sc') {
	94	while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
	95	my $filename = File::Spec->catfile(
	96	$updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
	97	);
	98
	99	next unless -e $filename;
	100	my ($h1, $h2) = map hex, split /\t/, (do $filename);
	101	my $str = join "", map chr, $h1 .. (($h2 \|\| $h1) + 1);
	102
	103	for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
	104	for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
	105	is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
	106	is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
107	is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1));
108	is($str =~ /(\P{$y}+)/ && $1, substr($str, -1));
109	}
110	}
111	}
112	}
113
114	# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
53cd5480	115	SKIP:
32d0b1dc	116	{
26961258	117	skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS';
53cd5480 CB	118
	119	# On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
	120	# return true. Try to work around this by reading the filenames explicitly
	121	# to get a case sensitive test. N.B. This will fail if filename case is
	122	# not preserved because you might go looking for a class name of CF or cf
	123	# when you really want Cf. Storing case sensitive data in filenames is
	124	# simply not portable.
	125
32d0b1dc	126	my %files;
12ac2576	127
32d0b1dc NC	128	my $dirname = File::Spec->catdir($updir => lib => unicore => lib => gc_sc);
	129	opendir D, $dirname or die $!;
	130	@files{readdir D} = ();
	131	closedir D;
	132
	133	for (keys %utf8::PA_reverse) {
	134	my $leafname = "$utf8::PA_reverse{$_}.pl";
	135	next unless exists $files{$leafname};
12ac2576	136
32d0b1dc NC	137	my $filename = File::Spec->catfile($dirname, $leafname);
	138
	139	my ($h1, $h2) = map hex, split /\t/, (do $filename);
	140	my $str = join "", map chr, $h1 .. (($h2 \|\| $h1) + 1);
	141
	142	for my $x ('gc', 'General Category') {
	143	print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
	144	for my $y ($_, $utf8::PA_reverse{$_}) {
	145	is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
	146	is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
	147	is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1));
	148	is($str =~ /(\P{$y}+)/ && $1, substr($str, -1));
	149	}
12ac2576 JP	150	}
	151	}
	152	}
	153
	154	# test the blocks (InFoobar)
	155	for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
	156	my $filename = File::Spec->catfile(
	157	$updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
	158	);
	159
	160	next unless -e $filename;
	161	my ($h1, $h2) = map hex, split /\t/, (do $filename);
	162	my $str = join "", map chr, $h1 .. (($h2 \|\| $h1) + 1);
	163
	164	my $blk = $_;
	165
	166	is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1));
	167	is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1));
	168
	169	$blk =~ s/^In/Block:/;
	170
	171	is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1));
	172	is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1));
	173	}