This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade DB_File to 1.827
[perl5.git] / cpan / Unicode-Collate / mkheader
CommitLineData
f58b9ef1
CBW
1#!perl
2#
3# This auxiliary script makes five header files
4# used for building XSUB of Unicode::Collate.
5#
6# Usage:
7# <do 'mkheader'> in perl, or <perl mkheader> in command line
8#
9# Input file:
10# Collate/allkeys.txt
11#
12# Output file:
13# ucatbl.h
14#
15use 5.006;
16use strict;
17use warnings;
18use Carp;
19use File::Spec;
20
21BEGIN {
22 unless ("A" eq pack('U', 0x41)) {
23 die "Unicode::Collate cannot stringify a Unicode code point\n";
24 }
25}
26
27use constant TRUE => 1;
28use constant FALSE => "";
29use constant VCE_TEMPLATE => 'Cn4';
30
31sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
32
33our $PACKAGE = 'Unicode::Collate, mkheader';
34our $prefix = "UCA_";
35
36our %SimpleEntries; # $codepoint => $keys
37our @Rest;
38
39{
40 my($f, $fh);
19265284 41 foreach my $d (File::Spec->curdir()) {
f58b9ef1
CBW
42 $f = File::Spec->catfile($d, "Collate", "allkeys.txt");
43 last if open($fh, $f);
44 $f = undef;
45 }
46 croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f;
47
48 while (my $line = <$fh>) {
49 next if $line =~ /^\s*#/;
50 if ($line =~ /^\s*\@/) {
51 push @Rest, $line;
52 next;
53 }
54
55 next if $line !~ /^\s*[0-9A-Fa-f]/;
56
57 $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name)
58
59 # gets element
60 my($e, $k) = split /;/, $line;
61
62 croak "Wrong Entry: <charList> must be separated by ';' ".
63 "from <collElement>" if ! $k;
64
65 my @uv = _getHexArray($e);
66 next if !@uv;
67
68 if (@uv != 1) {
69 push @Rest, $line;
70 next;
71 }
72
73 my $is_L3_ignorable = TRUE;
74
75 my @key;
76 foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
77 my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
78 my @wt = _getHexArray($arr);
79 push @key, pack(VCE_TEMPLATE, $var, @wt);
80 $is_L3_ignorable = FALSE
81 if $wt[0] || $wt[1] || $wt[2];
82 # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable
83 # is completely ignorable.
84 # For expansion, an entry $is_L3_ignorable
85 # if and only if "all" CEs are [.0000.0000.0000].
86 }
87 my $mapping = $is_L3_ignorable ? [] : \@key;
88 my $num = @$mapping;
89 my $str = chr($num).join('', @$mapping);
90 $SimpleEntries{$uv[0]} = stringify($str);
91 }
92}
93
94sub stringify {
95 my $str = shift;
96 return sprintf '"%s"', join '',
97 map sprintf("\\x%02x", ord $_), split //, $str;
98
99}
100
101########## writing header files ##########
102
103my $init = '';
104{
105 my $type = "char*";
106 my $head = $prefix."rest";
107
108 $init .= "static $type $head [] = {\n";
109 for my $line (@Rest) {
110 $line =~ s/\s*\z//;
111 next if $line eq '';
112 $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/;
113 $init .= "($type)".stringify($line).",\n";
114 }
115 $init .= "NULL\n"; # sentinel
116 $init .= "};\n\n";
117}
118
119my @tripletable = (
120 {
121 file => "ucatbl",
122 name => "simple",
123 type => "char*",
124 hash => \%SimpleEntries,
125 null => "NULL",
126 init => $init,
127 },
128);
129
130foreach my $tbl (@tripletable) {
131 my $file = "$tbl->{file}.h";
132 my $head = "${prefix}$tbl->{name}";
133 my $type = $tbl->{type};
134 my $hash = $tbl->{hash};
135 my $null = $tbl->{null};
136 my $init = $tbl->{init};
137
138 open FH, ">$file" or croak "$PACKAGE: $file can't be made";
139 binmode FH; select FH;
140 my %val;
141
142 print FH << 'EOF';
143/*
144 * This file is auto-generated by mkheader.
145 * Any changes here will be lost!
146 */
147EOF
148
149 print $init if defined $init;
150
151 foreach my $uv (keys %$hash) {
152 croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
153 unless $uv <= 0x10FFFF;
154 my @c = unpack 'CCCC', pack 'N', $uv;
155 $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
156 }
157
158 foreach my $p (sort { $a <=> $b } keys %val) {
159 next if ! $val{ $p };
160 for (my $r = 0; $r < 256; $r++) {
161 next if ! $val{ $p }{ $r };
162 printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r;
163 for (my $c = 0; $c < 256; $c++) {
164 print "\t", defined $val{$p}{$r}{$c}
165 ? "($type)".$val{$p}{$r}{$c}
166 : $null;
167 print ',' if $c != 255;
168 print "\n" if $c % 8 == 7;
169 }
170 print "};\n\n";
171 }
172 }
173 foreach my $p (sort { $a <=> $b } keys %val) {
174 next if ! $val{ $p };
175 printf "static $type* ${head}_%02x [256] = {\n", $p;
176 for (my $r = 0; $r < 256; $r++) {
177 print $val{ $p }{ $r }
178 ? sprintf("${head}_%02x_%02x", $p, $r)
179 : "NULL";
180 print ',' if $r != 255;
181 print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
182 }
183 print "};\n\n";
184 }
185 print "static $type** $head [] = {\n";
186 for (my $p = 0; $p <= 0x10; $p++) {
187 print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
188 print ',' if $p != 0x10;
189 print "\n";
190 }
191 print "};\n\n";
192 close FH;
193}
194
1951;
196__END__