Fix the Unicode Bug in the range operator

[perl5.git] / regen / ebcdic.pl
diff --git a/regen/ebcdic.pl b/regen/ebcdic.pl

index 60b74aa..a3e049d 100644 (file)
--- a/regen/ebcdic.pl
+++ b/regen/ebcdic.pl
@@ -1,8 +1,11 @@
  use v5.16.0;
  use strict;
  use warnings;
-require 'regen/regen_lib.pl';
-require 'regen/charset_translations.pl';
+
+BEGIN { unshift @INC, '.' }
+
+require './regen/regen_lib.pl';
+require './regen/charset_translations.pl';
  
  # Generates the EBCDIC translation tables that were formerly hard-coded into
  # utfebcdic.h
@@ -10,20 +13,34 @@ require 'regen/charset_translations.pl';
  my $out_fh = open_new('ebcdic_tables.h', '>',
          {style => '*', by => $0, });
  
-sub output_table ($$) {
+sub output_table ($$;$) {
      my $table_ref = shift;
      my $name = shift;
  
+    # Tables in hex easier to debug, but don't fit into 80 columns
+    my $print_in_hex = shift // 1;
+
      die "Requres 256 entries in table $name, got @$table_ref" if @$table_ref != 256;
  
      print $out_fh "EXTCONST U8 $name\[\] = {\n";
  
+    my $column_numbers= "/*_0   _1   _2   _3   _4   _5   _6   _7   _8   _9   _A   _B   _C   _D   _E  _F*/\n";
+    print $out_fh $column_numbers if $print_in_hex;
      for my $i (0 .. 255) {
-        printf $out_fh "%4d", $table_ref->[$i];
-        #printf $out_fh " 0x%02X", $table_ref->[$i];
+        if ($print_in_hex) {
+            # No row headings, so will fit in 80 cols.
+            #printf $out_fh "/* %X_ */ ", $i / 16 if $i % 16 == 0;
+            printf $out_fh "0x%02X", $table_ref->[$i];
+        }
+        else {
+            printf $out_fh "%4d", $table_ref->[$i];
+        }
          print $out_fh ",", if $i < 255;
+        #print $out_fh ($i < 255) ? "," : " ";
+        #printf $out_fh " /* %X_ */", $i / 16 if $print_in_hex && $i % 16 == 15;
          print $out_fh "\n" if $i % 16 == 15;
      }
+    print $out_fh $column_numbers if $print_in_hex;
      print $out_fh "};\n\n";
  }
  
@@ -39,7 +56,8 @@ END
  my @charsets = get_supported_code_pages();
  shift @charsets;    # ASCII is the 0th, and we don't deal with that here.
  foreach my $charset (@charsets) {
-    my @a2e = get_a2n($charset);
+    # we process the whole array several times, make a copy
+    my @a2e = @{get_a2n($charset)};
  
      print $out_fh "\n" . get_conditional_compile_line_start($charset);
      print $out_fh "\n";
@@ -87,7 +105,11 @@ END
          # order 1-bits (up to 7)
          for my $i (0xC0 .. 255) {
              my $count;
-            if (($i & 0b11111110) == 0b11111110) {
+            if ($i == 0b11111111) {
+                no warnings 'once';
+                $count = $CHARSET_TRANSLATIONS::UTF_EBCDIC_MAXBYTES;
+            }
+            elsif (($i & 0b11111110) == 0b11111110) {
                  $count= 7;
              }
              elsif (($i & 0b11111100) == 0b11111100) {
@@ -117,7 +139,8 @@ END
   * entries marked 9 in tr16 are continuation bytes and are marked as length 1
   * here so that we can recover. */
  END
-        output_table(\@utf8skip, "PL_utf8skip");
+        output_table(\@utf8skip, "PL_utf8skip", 0);  # The 0 means don't print
+                                                     # in hex
      }
  
      use feature 'unicode_strings';