This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
new perldelta
[perl5.git] / cpan / Unicode-Collate / mkheader
CommitLineData
f58b9ef1
CBW
1#!perl
2#
3# This auxiliary script makes five header files
4# used for building XSUB of Unicode::Collate.
5#
6# Usage:
6608d2d5 7# <do './mkheader'> in perl, or <perl mkheader> in command line
f58b9ef1
CBW
8#
9# Input file:
10# Collate/allkeys.txt
11#
12# Output file:
13# ucatbl.h
14#
15use 5.006;
16use strict;
17use warnings;
18use Carp;
19use File::Spec;
20
f58b9ef1
CBW
21use constant TRUE => 1;
22use constant FALSE => "";
23use constant VCE_TEMPLATE => 'Cn4';
24
25sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
26
27our $PACKAGE = 'Unicode::Collate, mkheader';
28our $prefix = "UCA_";
29
30our %SimpleEntries; # $codepoint => $keys
31our @Rest;
32
33{
34 my($f, $fh);
19265284 35 foreach my $d (File::Spec->curdir()) {
f58b9ef1
CBW
36 $f = File::Spec->catfile($d, "Collate", "allkeys.txt");
37 last if open($fh, $f);
38 $f = undef;
39 }
40 croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f;
41
42 while (my $line = <$fh>) {
43 next if $line =~ /^\s*#/;
44 if ($line =~ /^\s*\@/) {
45 push @Rest, $line;
46 next;
47 }
48
9d8690d8 49 next if $line !~ /^\s*[0-9A-Fa-f]/; # lines without element
f58b9ef1
CBW
50
51 $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name)
52
53 # gets element
54 my($e, $k) = split /;/, $line;
55
56 croak "Wrong Entry: <charList> must be separated by ';' ".
57 "from <collElement>" if ! $k;
58
59 my @uv = _getHexArray($e);
60 next if !@uv;
61
62 if (@uv != 1) {
63 push @Rest, $line;
64 next;
9d8690d8 65 # Contractions of two or more characters will not be compiled.
f58b9ef1
CBW
66 }
67
68 my $is_L3_ignorable = TRUE;
69
70 my @key;
71 foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
72 my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
73 my @wt = _getHexArray($arr);
74 push @key, pack(VCE_TEMPLATE, $var, @wt);
75 $is_L3_ignorable = FALSE
76 if $wt[0] || $wt[1] || $wt[2];
77 # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable
78 # is completely ignorable.
79 # For expansion, an entry $is_L3_ignorable
80 # if and only if "all" CEs are [.0000.0000.0000].
81 }
82 my $mapping = $is_L3_ignorable ? [] : \@key;
83 my $num = @$mapping;
84 my $str = chr($num).join('', @$mapping);
85 $SimpleEntries{$uv[0]} = stringify($str);
86 }
87}
88
89sub stringify {
90 my $str = shift;
91 return sprintf '"%s"', join '',
92 map sprintf("\\x%02x", ord $_), split //, $str;
93
94}
95
96########## writing header files ##########
97
98my $init = '';
99{
9d8690d8 100 my $type = "char* const";
f58b9ef1
CBW
101 my $head = $prefix."rest";
102
9d8690d8 103 $init .= "static const $type $head [] = {\n";
f58b9ef1
CBW
104 for my $line (@Rest) {
105 $line =~ s/\s*\z//;
106 next if $line eq '';
107 $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/;
3d7de2d1 108 $init .= stringify($line).",\n";
f58b9ef1
CBW
109 }
110 $init .= "NULL\n"; # sentinel
111 $init .= "};\n\n";
112}
113
114my @tripletable = (
115 {
116 file => "ucatbl",
117 name => "simple",
9d8690d8 118 type => "char* const",
f58b9ef1
CBW
119 hash => \%SimpleEntries,
120 null => "NULL",
121 init => $init,
122 },
123);
124
125foreach my $tbl (@tripletable) {
126 my $file = "$tbl->{file}.h";
127 my $head = "${prefix}$tbl->{name}";
128 my $type = $tbl->{type};
129 my $hash = $tbl->{hash};
130 my $null = $tbl->{null};
131 my $init = $tbl->{init};
132
60f577e0 133 open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made";
1c1bafd8
TS
134 binmode $fh_h;
135 my $old_fh = select $fh_h;
f58b9ef1
CBW
136 my %val;
137
60f577e0 138 print << 'EOF';
f58b9ef1
CBW
139/*
140 * This file is auto-generated by mkheader.
141 * Any changes here will be lost!
142 */
143EOF
144
145 print $init if defined $init;
146
147 foreach my $uv (keys %$hash) {
148 croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
149 unless $uv <= 0x10FFFF;
150 my @c = unpack 'CCCC', pack 'N', $uv;
151 $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
9d8690d8 152 # $c[0] must be 0.
f58b9ef1
CBW
153 }
154
155 foreach my $p (sort { $a <=> $b } keys %val) {
156 next if ! $val{ $p };
157 for (my $r = 0; $r < 256; $r++) {
158 next if ! $val{ $p }{ $r };
9d8690d8 159 printf "static const $type ${head}_%02x_%02x [256] = {\n", $p, $r;
f58b9ef1
CBW
160 for (my $c = 0; $c < 256; $c++) {
161 print "\t", defined $val{$p}{$r}{$c}
3d7de2d1 162 ? $val{$p}{$r}{$c}
f58b9ef1
CBW
163 : $null;
164 print ',' if $c != 255;
165 print "\n" if $c % 8 == 7;
166 }
167 print "};\n\n";
168 }
169 }
170 foreach my $p (sort { $a <=> $b } keys %val) {
171 next if ! $val{ $p };
9d8690d8 172 printf "static const $type* const ${head}_%02x [256] = {\n", $p;
f58b9ef1
CBW
173 for (my $r = 0; $r < 256; $r++) {
174 print $val{ $p }{ $r }
175 ? sprintf("${head}_%02x_%02x", $p, $r)
176 : "NULL";
177 print ',' if $r != 255;
178 print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
179 }
180 print "};\n\n";
181 }
9d8690d8 182 print "static const $type* const * const $head [] = {\n";
f58b9ef1
CBW
183 for (my $p = 0; $p <= 0x10; $p++) {
184 print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
185 print ',' if $p != 0x10;
186 print "\n";
187 }
188 print "};\n\n";
60f577e0 189 close $fh_h;
1c1bafd8 190 select $old_fh;
f58b9ef1
CBW
191}
192
1931;
194__END__