X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/3d7c117d5246fe5390f3fda7bd31308799d54201..ffd62fc2fb74955cac5af41e7b3820e09877c3b4:/regen/ebcdic.pl diff --git a/regen/ebcdic.pl b/regen/ebcdic.pl index a3e049d..cfb4d4e 100644 --- a/regen/ebcdic.pl +++ b/regen/ebcdic.pl @@ -1,6 +1,7 @@ use v5.16.0; use strict; use warnings; +use integer; BEGIN { unshift @INC, '.' } @@ -13,44 +14,230 @@ require './regen/charset_translations.pl'; my $out_fh = open_new('ebcdic_tables.h', '>', {style => '*', by => $0, }); +sub get_column_headers ($$;$) { + my ($row_hdr_len, $field_width, $dfa_columns) = @_; + my $format; + my $final_column_format; + my $num_columns; + + if (defined $dfa_columns) { + $num_columns = $dfa_columns; + + # Trailing blank to correspond with commas in the rows below + $format = "%${field_width}d "; + } + else { # Is a regular table + $num_columns = 16; + + # Use blanks to separate the fields + $format = " " x ( $field_width + - 2); # For the '_X' + $format .= "_%X "; # Again, trailing blank over the commas below + } + + my $header = "/*" . " " x ($row_hdr_len - length "/*"); + + # All but the final column + $header .= sprintf($format, $_) for 0 .. $num_columns - 2; + + # Get rid of trailing blank, so that the final column takes up one less + # space so that the "*/" doesn't extend past the commas in the rows below + chop $header; + $header .= sprintf $format, $num_columns - 1; + + # Again, remove trailing blank + chop $header; + + return $header . "*/\n"; +} + +sub output_table_start($$$) { + my ($out_fh, $TYPE, $name) = @_; + + my $declaration = "EXTCONST $TYPE $name\[\]"; + print $out_fh < print in decimal + # 1 => print in hex (translates code point to code point) + # >= 2 => is a dfa table, like http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + # The number is how many columns in the part after the code point + # portion. + # + # code point tables in hex areasier to debug, but don't fit into 80 + # columns + my $type = shift // 1; + + my $print_in_hex = $type == 1; + my $is_dfa = ($type >= 2) ? $type : 0; + my $columns_after_256 = 16; + + die "Requres 256 entries in table $name, got @$table_ref" + if ! $is_dfa && @$table_ref != 256; + if (! $is_dfa) { + die "Requres 256 entries in table $name, got @$table_ref" + if @$table_ref != 256; + } + else { + $columns_after_256 = $is_dfa; + + print $out_fh <<'EOF'; + +/* The table below is adapted from + * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + * See copyright notice at the beginning of this file. + */ + +EOF + } + + # Highest number in the table + my $max_entry = 0; + $max_entry = map { $_ > $max_entry ? $_ : $max_entry } @$table_ref; + + # We assume that every table has at least one two digit entry, and none + # are more than three digit. + my $field_width = ($print_in_hex) + ? 4 + : (($max_entry) > 99 ? 3 : 2); + + my $row_hdr_length; + my $node_number_field_width; + my $node_value_field_width; + + # dfa tables have a special header for the rows in the transitions part of + # the table. It is longer than the regular one. + if ($is_dfa) { + my $max_node_number = ($max_entry - 256) / $columns_after_256 - 1; + $node_number_field_width = ($max_node_number > 9) ? 2 : 1; + $node_value_field_width = ($max_node_number * $columns_after_256 > 99) + ? 3 : 2; + # The header starts with this template, and adds in the number of + # digits needed to represent the maximum node number and its value + $row_hdr_length = length("/*N=*/") + + $node_number_field_width + + $node_value_field_width; + } + else { + $row_hdr_length = length "/*_X*/"; # Template for what the header + # looks like + } - die "Requres 256 entries in table $name, got @$table_ref" if @$table_ref != 256; + # The table may not be representable in 8 bits. + my $TYPE = 'U8'; + $TYPE = 'U16' if grep { $_ > 255 } @$table_ref; - print $out_fh "EXTCONST U8 $name\[\] = {\n"; + output_table_start $out_fh, $TYPE, $name; + + # First the headers for the columns + print $out_fh get_column_headers($row_hdr_length, $field_width); + + # Now the table body + my $count = @$table_ref; + my $last_was_nl = 1; + + # Print each element individually, arranged in rows of columns + for my $i (0 .. $count - 1) { + + # Node number for here is -1 until get into the dfa state transitions + my $node = ($i < 256) ? -1 : ($i - 256) / $columns_after_256; + + # Print row header at beginning of each row + if ($last_was_nl) { + if ($node >= 0) { + printf $out_fh "/*N%-*d=%*d*/", $node_number_field_width, $node, + $node_value_field_width, $i - 256; + } + else { # Otherwise is regular row; print its number + printf $out_fh "/*%X_", $i / 16; + + # These rows in a dfa table require extra space so columns + # will align vertically (because the Ndd=ddd requires extra + # space) + if ($is_dfa) { + print $out_fh " " x ( $node_number_field_width + + $node_value_field_width); + } + print $out_fh "*/"; + } + } - my $column_numbers= "/*_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/\n"; - print $out_fh $column_numbers if $print_in_hex; - for my $i (0 .. 255) { if ($print_in_hex) { - # No row headings, so will fit in 80 cols. - #printf $out_fh "/* %X_ */ ", $i / 16 if $i % 16 == 0; printf $out_fh "0x%02X", $table_ref->[$i]; } else { - printf $out_fh "%4d", $table_ref->[$i]; + printf $out_fh "%${field_width}d", $table_ref->[$i]; + } + + print $out_fh ",", if $i < $count -1; # No comma on final entry + + # Add \n if at end of row, which is 16 columns until we get to the + # transitions part + if ( ($node < 0 && $i % 16 == 15) + || ($node >= 0 && ($i -256) % $columns_after_256 + == $columns_after_256 - 1)) + { + print $out_fh "\n"; + $last_was_nl = 1; + } + else { + $last_was_nl = 0; } - print $out_fh ",", if $i < 255; - #print $out_fh ($i < 255) ? "," : " "; - #printf $out_fh " /* %X_ */", $i / 16 if $print_in_hex && $i % 16 == 15; - print $out_fh "\n" if $i % 16 == 15; } - print $out_fh $column_numbers if $print_in_hex; - print $out_fh "};\n\n"; + + # Print column footer + print $out_fh get_column_headers($row_hdr_length, $field_width, + ($is_dfa) ? $columns_after_256 : undef); + + output_table_end($out_fh); } -print $out_fh < + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +*/ END my @charsets = get_supported_code_pages(); @@ -134,10 +321,10 @@ END } print $out_fh <