#!perl # # This auxiliary script makes five header files # used for building XSUB of Unicode::Collate. # # Usage: # in perl, or in command line # # Input file: # Collate/allkeys.txt # # Output file: # ucatbl.h # use 5.006; use strict; use warnings; use Carp; use File::Spec; BEGIN { unless ("A" eq pack('U', 0x41)) { die "Unicode::Collate cannot stringify a Unicode code point\n"; } unless (0x41 == unpack('U', 'A')) { die "Unicode::Collate cannot get a Unicode code point\n"; } } use constant TRUE => 1; use constant FALSE => ""; use constant VCE_TEMPLATE => 'Cn4'; sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g } our $PACKAGE = 'Unicode::Collate, mkheader'; our $prefix = "UCA_"; our %SimpleEntries; # $codepoint => $keys our @Rest; { my($f, $fh); foreach my $d (File::Spec->curdir()) { $f = File::Spec->catfile($d, "Collate", "allkeys.txt"); last if open($fh, $f); $f = undef; } croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f; while (my $line = <$fh>) { next if $line =~ /^\s*#/; if ($line =~ /^\s*\@/) { push @Rest, $line; next; } next if $line !~ /^\s*[0-9A-Fa-f]/; # lines without element $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name) # gets element my($e, $k) = split /;/, $line; croak "Wrong Entry: must be separated by ';' ". "from " if ! $k; my @uv = _getHexArray($e); next if !@uv; if (@uv != 1) { push @Rest, $line; next; # Contractions of two or more characters will not be compiled. } my $is_L3_ignorable = TRUE; my @key; foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient. my @wt = _getHexArray($arr); push @key, pack(VCE_TEMPLATE, $var, @wt); $is_L3_ignorable = FALSE if $wt[0] || $wt[1] || $wt[2]; # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable # is completely ignorable. # For expansion, an entry $is_L3_ignorable # if and only if "all" CEs are [.0000.0000.0000]. } my $mapping = $is_L3_ignorable ? [] : \@key; my $num = @$mapping; my $str = chr($num).join('', @$mapping); $SimpleEntries{$uv[0]} = stringify($str); } } sub stringify { my $str = shift; return sprintf '"%s"', join '', map sprintf("\\x%02x", ord $_), split //, $str; } ########## writing header files ########## my $init = ''; { my $type = "char* const"; my $head = $prefix."rest"; $init .= "static const $type $head [] = {\n"; for my $line (@Rest) { $line =~ s/\s*\z//; next if $line eq ''; $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/; $init .= stringify($line).",\n"; } $init .= "NULL\n"; # sentinel $init .= "};\n\n"; } my @tripletable = ( { file => "ucatbl", name => "simple", type => "char* const", hash => \%SimpleEntries, null => "NULL", init => $init, }, ); foreach my $tbl (@tripletable) { my $file = "$tbl->{file}.h"; my $head = "${prefix}$tbl->{name}"; my $type = $tbl->{type}; my $hash = $tbl->{hash}; my $null = $tbl->{null}; my $init = $tbl->{init}; open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made"; binmode $fh_h; select $fh_h; my %val; print << 'EOF'; /* * This file is auto-generated by mkheader. * Any changes here will be lost! */ EOF print $init if defined $init; foreach my $uv (keys %$hash) { croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv) unless $uv <= 0x10FFFF; my @c = unpack 'CCCC', pack 'N', $uv; $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; # $c[0] must be 0. } foreach my $p (sort { $a <=> $b } keys %val) { next if ! $val{ $p }; for (my $r = 0; $r < 256; $r++) { next if ! $val{ $p }{ $r }; printf "static const $type ${head}_%02x_%02x [256] = {\n", $p, $r; for (my $c = 0; $c < 256; $c++) { print "\t", defined $val{$p}{$r}{$c} ? $val{$p}{$r}{$c} : $null; print ',' if $c != 255; print "\n" if $c % 8 == 7; } print "};\n\n"; } } foreach my $p (sort { $a <=> $b } keys %val) { next if ! $val{ $p }; printf "static const $type* const ${head}_%02x [256] = {\n", $p; for (my $r = 0; $r < 256; $r++) { print $val{ $p }{ $r } ? sprintf("${head}_%02x_%02x", $p, $r) : "NULL"; print ',' if $r != 255; print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; } print "};\n\n"; } print "static const $type* const * const $head [] = {\n"; for (my $p = 0; $p <= 0x10; $p++) { print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; print ',' if $p != 0x10; print "\n"; } print "};\n\n"; close $fh_h; } 1; __END__