3 # This auxiliary script makes five header files
4 # used for building XSUB of Unicode::Collate.
7 # <do 'mkheader'> in perl, or <perl mkheader> in command line
22 unless ("A" eq pack('U', 0x41)) {
23 die "Unicode::Collate cannot stringify a Unicode code point\n";
25 unless (0x41 == unpack('U', 'A')) {
26 die "Unicode::Collate cannot get a Unicode code point\n";
30 use constant TRUE => 1;
31 use constant FALSE => "";
32 use constant VCE_TEMPLATE => 'Cn4';
34 sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
36 our $PACKAGE = 'Unicode::Collate, mkheader';
39 our %SimpleEntries; # $codepoint => $keys
44 foreach my $d (File::Spec->curdir()) {
45 $f = File::Spec->catfile($d, "Collate", "allkeys.txt");
46 last if open($fh, $f);
49 croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f;
51 while (my $line = <$fh>) {
52 next if $line =~ /^\s*#/;
53 if ($line =~ /^\s*\@/) {
58 next if $line !~ /^\s*[0-9A-Fa-f]/;
60 $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name)
63 my($e, $k) = split /;/, $line;
65 croak "Wrong Entry: <charList> must be separated by ';' ".
66 "from <collElement>" if ! $k;
68 my @uv = _getHexArray($e);
76 my $is_L3_ignorable = TRUE;
79 foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
80 my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
81 my @wt = _getHexArray($arr);
82 push @key, pack(VCE_TEMPLATE, $var, @wt);
83 $is_L3_ignorable = FALSE
84 if $wt[0] || $wt[1] || $wt[2];
85 # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable
86 # is completely ignorable.
87 # For expansion, an entry $is_L3_ignorable
88 # if and only if "all" CEs are [.0000.0000.0000].
90 my $mapping = $is_L3_ignorable ? [] : \@key;
92 my $str = chr($num).join('', @$mapping);
93 $SimpleEntries{$uv[0]} = stringify($str);
99 return sprintf '"%s"', join '',
100 map sprintf("\\x%02x", ord $_), split //, $str;
104 ########## writing header files ##########
109 my $head = $prefix."rest";
111 $init .= "static $type $head [] = {\n";
112 for my $line (@Rest) {
115 $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/;
116 $init .= "($type)".stringify($line).",\n";
118 $init .= "NULL\n"; # sentinel
127 hash => \%SimpleEntries,
133 foreach my $tbl (@tripletable) {
134 my $file = "$tbl->{file}.h";
135 my $head = "${prefix}$tbl->{name}";
136 my $type = $tbl->{type};
137 my $hash = $tbl->{hash};
138 my $null = $tbl->{null};
139 my $init = $tbl->{init};
141 open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made";
142 binmode $fh_h; select $fh_h;
147 * This file is auto-generated by mkheader.
148 * Any changes here will be lost!
152 print $init if defined $init;
154 foreach my $uv (keys %$hash) {
155 croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
156 unless $uv <= 0x10FFFF;
157 my @c = unpack 'CCCC', pack 'N', $uv;
158 $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
161 foreach my $p (sort { $a <=> $b } keys %val) {
162 next if ! $val{ $p };
163 for (my $r = 0; $r < 256; $r++) {
164 next if ! $val{ $p }{ $r };
165 printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r;
166 for (my $c = 0; $c < 256; $c++) {
167 print "\t", defined $val{$p}{$r}{$c}
168 ? "($type)".$val{$p}{$r}{$c}
170 print ',' if $c != 255;
171 print "\n" if $c % 8 == 7;
176 foreach my $p (sort { $a <=> $b } keys %val) {
177 next if ! $val{ $p };
178 printf "static $type* ${head}_%02x [256] = {\n", $p;
179 for (my $r = 0; $r < 256; $r++) {
180 print $val{ $p }{ $r }
181 ? sprintf("${head}_%02x_%02x", $p, $r)
183 print ',' if $r != 255;
184 print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
188 print "static $type** $head [] = {\n";
189 for (my $p = 0; $p <= 0x10; $p++) {
190 print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
191 print ',' if $p != 0x10;