This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Synch with CPAN version 1.30
[perl5.git] / cpan / Unicode-Collate / mkheader
CommitLineData
f58b9ef1
CBW
1#!perl
2#
3# This auxiliary script makes five header files
4# used for building XSUB of Unicode::Collate.
5#
6# Usage:
6608d2d5 7# <do './mkheader'> in perl, or <perl mkheader> in command line
f58b9ef1
CBW
8#
9# Input file:
10# Collate/allkeys.txt
11#
12# Output file:
13# ucatbl.h
14#
15use 5.006;
16use strict;
17use warnings;
18use Carp;
19use File::Spec;
20
21BEGIN {
22 unless ("A" eq pack('U', 0x41)) {
23 die "Unicode::Collate cannot stringify a Unicode code point\n";
d8e4b4ea
CBW
24 }
25 unless (0x41 == unpack('U', 'A')) {
26 die "Unicode::Collate cannot get a Unicode code point\n";
f58b9ef1
CBW
27 }
28}
29
30use constant TRUE => 1;
31use constant FALSE => "";
32use constant VCE_TEMPLATE => 'Cn4';
33
34sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
35
36our $PACKAGE = 'Unicode::Collate, mkheader';
37our $prefix = "UCA_";
38
39our %SimpleEntries; # $codepoint => $keys
40our @Rest;
41
42{
43 my($f, $fh);
19265284 44 foreach my $d (File::Spec->curdir()) {
f58b9ef1
CBW
45 $f = File::Spec->catfile($d, "Collate", "allkeys.txt");
46 last if open($fh, $f);
47 $f = undef;
48 }
49 croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f;
50
51 while (my $line = <$fh>) {
52 next if $line =~ /^\s*#/;
53 if ($line =~ /^\s*\@/) {
54 push @Rest, $line;
55 next;
56 }
57
9d8690d8 58 next if $line !~ /^\s*[0-9A-Fa-f]/; # lines without element
f58b9ef1
CBW
59
60 $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name)
61
62 # gets element
63 my($e, $k) = split /;/, $line;
64
65 croak "Wrong Entry: <charList> must be separated by ';' ".
66 "from <collElement>" if ! $k;
67
68 my @uv = _getHexArray($e);
69 next if !@uv;
70
71 if (@uv != 1) {
72 push @Rest, $line;
73 next;
9d8690d8 74 # Contractions of two or more characters will not be compiled.
f58b9ef1
CBW
75 }
76
77 my $is_L3_ignorable = TRUE;
78
79 my @key;
80 foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
81 my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
82 my @wt = _getHexArray($arr);
83 push @key, pack(VCE_TEMPLATE, $var, @wt);
84 $is_L3_ignorable = FALSE
85 if $wt[0] || $wt[1] || $wt[2];
86 # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable
87 # is completely ignorable.
88 # For expansion, an entry $is_L3_ignorable
89 # if and only if "all" CEs are [.0000.0000.0000].
90 }
91 my $mapping = $is_L3_ignorable ? [] : \@key;
92 my $num = @$mapping;
93 my $str = chr($num).join('', @$mapping);
94 $SimpleEntries{$uv[0]} = stringify($str);
95 }
96}
97
98sub stringify {
99 my $str = shift;
100 return sprintf '"%s"', join '',
101 map sprintf("\\x%02x", ord $_), split //, $str;
102
103}
104
105########## writing header files ##########
106
107my $init = '';
108{
9d8690d8 109 my $type = "char* const";
f58b9ef1
CBW
110 my $head = $prefix."rest";
111
9d8690d8 112 $init .= "static const $type $head [] = {\n";
f58b9ef1
CBW
113 for my $line (@Rest) {
114 $line =~ s/\s*\z//;
115 next if $line eq '';
116 $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/;
3d7de2d1 117 $init .= stringify($line).",\n";
f58b9ef1
CBW
118 }
119 $init .= "NULL\n"; # sentinel
120 $init .= "};\n\n";
121}
122
123my @tripletable = (
124 {
125 file => "ucatbl",
126 name => "simple",
9d8690d8 127 type => "char* const",
f58b9ef1
CBW
128 hash => \%SimpleEntries,
129 null => "NULL",
130 init => $init,
131 },
132);
133
134foreach my $tbl (@tripletable) {
135 my $file = "$tbl->{file}.h";
136 my $head = "${prefix}$tbl->{name}";
137 my $type = $tbl->{type};
138 my $hash = $tbl->{hash};
139 my $null = $tbl->{null};
140 my $init = $tbl->{init};
141
60f577e0 142 open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made";
1c1bafd8
TS
143 binmode $fh_h;
144 my $old_fh = select $fh_h;
f58b9ef1
CBW
145 my %val;
146
60f577e0 147 print << 'EOF';
f58b9ef1
CBW
148/*
149 * This file is auto-generated by mkheader.
150 * Any changes here will be lost!
151 */
152EOF
153
154 print $init if defined $init;
155
156 foreach my $uv (keys %$hash) {
157 croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
158 unless $uv <= 0x10FFFF;
159 my @c = unpack 'CCCC', pack 'N', $uv;
160 $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
9d8690d8 161 # $c[0] must be 0.
f58b9ef1
CBW
162 }
163
164 foreach my $p (sort { $a <=> $b } keys %val) {
165 next if ! $val{ $p };
166 for (my $r = 0; $r < 256; $r++) {
167 next if ! $val{ $p }{ $r };
9d8690d8 168 printf "static const $type ${head}_%02x_%02x [256] = {\n", $p, $r;
f58b9ef1
CBW
169 for (my $c = 0; $c < 256; $c++) {
170 print "\t", defined $val{$p}{$r}{$c}
3d7de2d1 171 ? $val{$p}{$r}{$c}
f58b9ef1
CBW
172 : $null;
173 print ',' if $c != 255;
174 print "\n" if $c % 8 == 7;
175 }
176 print "};\n\n";
177 }
178 }
179 foreach my $p (sort { $a <=> $b } keys %val) {
180 next if ! $val{ $p };
9d8690d8 181 printf "static const $type* const ${head}_%02x [256] = {\n", $p;
f58b9ef1
CBW
182 for (my $r = 0; $r < 256; $r++) {
183 print $val{ $p }{ $r }
184 ? sprintf("${head}_%02x_%02x", $p, $r)
185 : "NULL";
186 print ',' if $r != 255;
187 print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
188 }
189 print "};\n\n";
190 }
9d8690d8 191 print "static const $type* const * const $head [] = {\n";
f58b9ef1
CBW
192 for (my $p = 0; $p <= 0x10; $p++) {
193 print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
194 print ',' if $p != 0x10;
195 print "\n";
196 }
197 print "};\n\n";
60f577e0 198 close $fh_h;
1c1bafd8 199 select $old_fh;
f58b9ef1
CBW
200}
201
2021;
203__END__