Commit | Line | Data |
---|---|---|
f58b9ef1 CBW |
1 | #!perl |
2 | # | |
3 | # This auxiliary script makes five header files | |
4 | # used for building XSUB of Unicode::Collate. | |
5 | # | |
6 | # Usage: | |
6608d2d5 | 7 | # <do './mkheader'> in perl, or <perl mkheader> in command line |
f58b9ef1 CBW |
8 | # |
9 | # Input file: | |
10 | # Collate/allkeys.txt | |
11 | # | |
12 | # Output file: | |
13 | # ucatbl.h | |
14 | # | |
15 | use 5.006; | |
16 | use strict; | |
17 | use warnings; | |
18 | use Carp; | |
19 | use File::Spec; | |
20 | ||
21 | BEGIN { | |
22 | unless ("A" eq pack('U', 0x41)) { | |
23 | die "Unicode::Collate cannot stringify a Unicode code point\n"; | |
d8e4b4ea CBW |
24 | } |
25 | unless (0x41 == unpack('U', 'A')) { | |
26 | die "Unicode::Collate cannot get a Unicode code point\n"; | |
f58b9ef1 CBW |
27 | } |
28 | } | |
29 | ||
30 | use constant TRUE => 1; | |
31 | use constant FALSE => ""; | |
32 | use constant VCE_TEMPLATE => 'Cn4'; | |
33 | ||
34 | sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g } | |
35 | ||
36 | our $PACKAGE = 'Unicode::Collate, mkheader'; | |
37 | our $prefix = "UCA_"; | |
38 | ||
39 | our %SimpleEntries; # $codepoint => $keys | |
40 | our @Rest; | |
41 | ||
42 | { | |
43 | my($f, $fh); | |
19265284 | 44 | foreach my $d (File::Spec->curdir()) { |
f58b9ef1 CBW |
45 | $f = File::Spec->catfile($d, "Collate", "allkeys.txt"); |
46 | last if open($fh, $f); | |
47 | $f = undef; | |
48 | } | |
49 | croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f; | |
50 | ||
51 | while (my $line = <$fh>) { | |
52 | next if $line =~ /^\s*#/; | |
53 | if ($line =~ /^\s*\@/) { | |
54 | push @Rest, $line; | |
55 | next; | |
56 | } | |
57 | ||
9d8690d8 | 58 | next if $line !~ /^\s*[0-9A-Fa-f]/; # lines without element |
f58b9ef1 CBW |
59 | |
60 | $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name) | |
61 | ||
62 | # gets element | |
63 | my($e, $k) = split /;/, $line; | |
64 | ||
65 | croak "Wrong Entry: <charList> must be separated by ';' ". | |
66 | "from <collElement>" if ! $k; | |
67 | ||
68 | my @uv = _getHexArray($e); | |
69 | next if !@uv; | |
70 | ||
71 | if (@uv != 1) { | |
72 | push @Rest, $line; | |
73 | next; | |
9d8690d8 | 74 | # Contractions of two or more characters will not be compiled. |
f58b9ef1 CBW |
75 | } |
76 | ||
77 | my $is_L3_ignorable = TRUE; | |
78 | ||
79 | my @key; | |
80 | foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed | |
81 | my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient. | |
82 | my @wt = _getHexArray($arr); | |
83 | push @key, pack(VCE_TEMPLATE, $var, @wt); | |
84 | $is_L3_ignorable = FALSE | |
85 | if $wt[0] || $wt[1] || $wt[2]; | |
86 | # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable | |
87 | # is completely ignorable. | |
88 | # For expansion, an entry $is_L3_ignorable | |
89 | # if and only if "all" CEs are [.0000.0000.0000]. | |
90 | } | |
91 | my $mapping = $is_L3_ignorable ? [] : \@key; | |
92 | my $num = @$mapping; | |
93 | my $str = chr($num).join('', @$mapping); | |
94 | $SimpleEntries{$uv[0]} = stringify($str); | |
95 | } | |
96 | } | |
97 | ||
98 | sub stringify { | |
99 | my $str = shift; | |
100 | return sprintf '"%s"', join '', | |
101 | map sprintf("\\x%02x", ord $_), split //, $str; | |
102 | ||
103 | } | |
104 | ||
105 | ########## writing header files ########## | |
106 | ||
107 | my $init = ''; | |
108 | { | |
9d8690d8 | 109 | my $type = "char* const"; |
f58b9ef1 CBW |
110 | my $head = $prefix."rest"; |
111 | ||
9d8690d8 | 112 | $init .= "static const $type $head [] = {\n"; |
f58b9ef1 CBW |
113 | for my $line (@Rest) { |
114 | $line =~ s/\s*\z//; | |
115 | next if $line eq ''; | |
116 | $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/; | |
3d7de2d1 | 117 | $init .= stringify($line).",\n"; |
f58b9ef1 CBW |
118 | } |
119 | $init .= "NULL\n"; # sentinel | |
120 | $init .= "};\n\n"; | |
121 | } | |
122 | ||
123 | my @tripletable = ( | |
124 | { | |
125 | file => "ucatbl", | |
126 | name => "simple", | |
9d8690d8 | 127 | type => "char* const", |
f58b9ef1 CBW |
128 | hash => \%SimpleEntries, |
129 | null => "NULL", | |
130 | init => $init, | |
131 | }, | |
132 | ); | |
133 | ||
134 | foreach my $tbl (@tripletable) { | |
135 | my $file = "$tbl->{file}.h"; | |
136 | my $head = "${prefix}$tbl->{name}"; | |
137 | my $type = $tbl->{type}; | |
138 | my $hash = $tbl->{hash}; | |
139 | my $null = $tbl->{null}; | |
140 | my $init = $tbl->{init}; | |
141 | ||
60f577e0 | 142 | open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made"; |
1c1bafd8 TS |
143 | binmode $fh_h; |
144 | my $old_fh = select $fh_h; | |
f58b9ef1 CBW |
145 | my %val; |
146 | ||
60f577e0 | 147 | print << 'EOF'; |
f58b9ef1 CBW |
148 | /* |
149 | * This file is auto-generated by mkheader. | |
150 | * Any changes here will be lost! | |
151 | */ | |
152 | EOF | |
153 | ||
154 | print $init if defined $init; | |
155 | ||
156 | foreach my $uv (keys %$hash) { | |
157 | croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv) | |
158 | unless $uv <= 0x10FFFF; | |
159 | my @c = unpack 'CCCC', pack 'N', $uv; | |
160 | $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; | |
9d8690d8 | 161 | # $c[0] must be 0. |
f58b9ef1 CBW |
162 | } |
163 | ||
164 | foreach my $p (sort { $a <=> $b } keys %val) { | |
165 | next if ! $val{ $p }; | |
166 | for (my $r = 0; $r < 256; $r++) { | |
167 | next if ! $val{ $p }{ $r }; | |
9d8690d8 | 168 | printf "static const $type ${head}_%02x_%02x [256] = {\n", $p, $r; |
f58b9ef1 CBW |
169 | for (my $c = 0; $c < 256; $c++) { |
170 | print "\t", defined $val{$p}{$r}{$c} | |
3d7de2d1 | 171 | ? $val{$p}{$r}{$c} |
f58b9ef1 CBW |
172 | : $null; |
173 | print ',' if $c != 255; | |
174 | print "\n" if $c % 8 == 7; | |
175 | } | |
176 | print "};\n\n"; | |
177 | } | |
178 | } | |
179 | foreach my $p (sort { $a <=> $b } keys %val) { | |
180 | next if ! $val{ $p }; | |
9d8690d8 | 181 | printf "static const $type* const ${head}_%02x [256] = {\n", $p; |
f58b9ef1 CBW |
182 | for (my $r = 0; $r < 256; $r++) { |
183 | print $val{ $p }{ $r } | |
184 | ? sprintf("${head}_%02x_%02x", $p, $r) | |
185 | : "NULL"; | |
186 | print ',' if $r != 255; | |
187 | print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; | |
188 | } | |
189 | print "};\n\n"; | |
190 | } | |
9d8690d8 | 191 | print "static const $type* const * const $head [] = {\n"; |
f58b9ef1 CBW |
192 | for (my $p = 0; $p <= 0x10; $p++) { |
193 | print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; | |
194 | print ',' if $p != 0x10; | |
195 | print "\n"; | |
196 | } | |
197 | print "};\n\n"; | |
60f577e0 | 198 | close $fh_h; |
1c1bafd8 | 199 | select $old_fh; |
f58b9ef1 CBW |
200 | } |
201 | ||
202 | 1; | |
203 | __END__ |