mktables: Use chr() instead of pack()

[perl5.git] / lib / unicore / mktables
diff --git a/lib/unicore/mktables b/lib/unicore/mktables

index a25b65f..a21aa1a 100644 (file)
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -364,16 +364,17 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  # Jamo.txt or UnicodeData.txt will likely cause fatal errors.
  #
  # To compare the output tables, it may be useful to specify the -annotate
-# flag.  This causes the tables to expand so there is one entry for each
-# non-algorithmically named code point giving, currently its name, and its
-# graphic representation if printable (and you have a font that knows about
-# it).  This makes it easier to see what the particular code points are in
-# each output table.  The tables are usable, but because they don't have
-# ranges (for the most part), a Perl using them will run slower.  Non-named
-# code points are annotated with a description of their status, and contiguous
-# ones with the same description will be output as a range rather than
-# individually.  Algorithmically named characters are also output as ranges,
-# except when there are just a few contiguous ones.
+# flag.  (As of this writing, this can't be done on a clean workspace, due to
+# requirements in Text::Tabs used in this option; so first run mktables
+# without this option.)  This option adds comment lines to each table, one for
+# each non-algorithmically named character giving, currently its code point,
+# name, and graphic representation if printable (and you have a font that
+# knows about it).  This makes it easier to see what the particular code
+# points are in each output table.  Non-named code points are annotated with a
+# description of their status, and contiguous ones with the same description
+# will be output as a range rather than individually.  Algorithmically named
+# characters are also output as ranges, except when there are just a few
+# contiguous ones.
  #
  # FUTURE ISSUES
  #
@@ -527,6 +528,9 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  #   0275;LATIN SMALL LETTER BARRED O;Ll;0;L;;;;;N;;;019F;;019F
  # Without this change, there are casing problems for this character.
  #
+# Search for $string_compare_versions to see how to compare changes to
+# properties between Unicode versions
+#
  ##############################################################################
  
  my $UNDEF = ':UNDEF:';  # String to print out for undefined values in tracing
@@ -679,6 +683,11 @@ my $VERBOSE = 3;
  
  my $verbosity = $NORMAL_VERBOSITY;
  
+# Stored in mktables.lst so that if this program is called with different
+# options, will regenerate even if the files otherwise look like they're
+# up-to-date.
+my $command_line_arguments = join " ", @ARGV;
+
  # Process arguments
  while (@ARGV) {
      my $arg = shift @ARGV;
@@ -761,9 +770,8 @@ usage: $0 [-c|-p|-q|-v|-w] [-C dir] [-L filelist] [ -P pod_dir ]
                  overrides -T
    -makelist   : Rewrite the file list $file_list based on current setup
    -annotate   : Output an annotation for each character in the table files;
-                useful for debugging mktables, looking at diffs; but is slow,
-                memory intensive; resulting tables are usable but are slow and
-                very large (and currently fail the Unicode::UCD.t tests).
+                useful for debugging mktables, looking at diffs; but is slow
+                and memory intensive
    -check A B  : Executes $0 only if A and B are the same
  END
      }
@@ -805,9 +813,9 @@ my $v_version = pack "C*", split /\./, $string_version;        # v string
  # are known to not match any code points in some versions of Unicode, but that
  # may change in the future so they should be matchable, hence an empty file is
  # generated for them.
-my @tables_that_may_be_empty = (
-                                'Joining_Type=Left_Joining',
-                                );
+my @tables_that_may_be_empty;
+push @tables_that_may_be_empty, 'Joining_Type=Left_Joining'
+                                                    if $v_version lt v6.3.0;
  push @tables_that_may_be_empty, 'Script=Common' if $v_version le v4.0.1;
  push @tables_that_may_be_empty, 'Title' if $v_version lt v2.0.0;
  push @tables_that_may_be_empty, 'Script=Katakana_Or_Hiragana'
@@ -918,7 +926,7 @@ my %why_stabilized;  # Documentation only
  my %why_obsolete;    # Documentation only
  
  {   # Closure
-    my $simple = 'Perl uses the more complete version of this property';
+    my $simple = 'Perl uses the more complete version';
      my $unihan = 'Unihan properties are by default not enabled in the Perl core.  Instead use CPAN: Unicode::Unihan';
  
      my $other_properties = 'other properties';
@@ -1178,7 +1186,8 @@ my $INTERNAL_ONLY_HEADER = <<"EOF";
  # !!!!!!!   INTERNAL PERL USE ONLY   !!!!!!!
  # This file is for internal use by core Perl only.  The format and even the
  # name or existence of this file are subject to change without notice.  Don't
-# use it directly.
+# use it directly.  Use Unicode::UCD to access the Unicode character data
+# base.
  EOF
  
  my $DEVELOPMENT_ONLY=<<"EOF";
@@ -1194,6 +1203,18 @@ my $MAX_UNICODE_CODEPOINT_STRING = "10FFFF";
  my $MAX_UNICODE_CODEPOINT = hex $MAX_UNICODE_CODEPOINT_STRING;
  my $MAX_UNICODE_CODEPOINTS = $MAX_UNICODE_CODEPOINT + 1;
  
+# We work with above-Unicode code points, up to UV_MAX.   But when you get
+# that high, above IV_MAX, some operations don't work, and you can easily get
+# overflow.  Therefore for internal use, we use a much smaller number,
+# translating it to UV_MAX only for output.  The exact number is immaterial
+# (all Unicode code points are treated exactly the same), but the algorithm
+# requires it to be at least 2 * $MAX_UNICODE_CODEPOINTS + 1;
+my $MAX_WORKING_CODEPOINTS= $MAX_UNICODE_CODEPOINT * 8;
+my $MAX_WORKING_CODEPOINT = $MAX_WORKING_CODEPOINTS - 1;
+my $MAX_WORKING_CODEPOINT_STRING = sprintf("%X", $MAX_WORKING_CODEPOINT);
+
+my $MAX_PLATFORM_CODEPOINT = ~0;
+
  # Matches legal code point.  4-6 hex numbers, If there are 6, the first
  # two must be 10; if there are 5, the first must not be a 0.  Written this way
  # to decrease backtracking.  The first regex allows the code point to be at
@@ -1301,10 +1322,14 @@ my $MAP_PROXIED = 1;    # The map table for the property isn't written out,
                          # reconstruct this table
  my $INTERNAL_ONLY = 2;  # The file for this table is written out, but it is
                          # for Perl's internal use only
-my $SUPPRESSED = 3;     # The file for this table is not written out, and as a
+my $LEGACY_ONLY = 3;    # Like $INTERNAL_ONLY, but not actually used by Perl.
+                        # Is for backwards compatibility for applications that
+                        # read the file directly, so it's format is
+                        # unchangeable.
+my $SUPPRESSED = 4;     # The file for this table is not written out, and as a
                          # result, we don't bother to do many computations on
                          # it.
-my $PLACEHOLDER = 4;    # Like $SUPPRESSED, but we go through all the
+my $PLACEHOLDER = 5;    # Like $SUPPRESSED, but we go through all the
                          # computations anyway, as the values are needed for
                          # things to work.  This happens when we have Perl
                          # extensions that depend on Unicode tables that
@@ -1320,6 +1345,7 @@ my $HEX_FORMAT = 'x';
  my $RATIONAL_FORMAT = 'r';
  my $STRING_FORMAT = 's';
  my $ADJUST_FORMAT = 'a';
+my $HEX_ADJUST_FORMAT = 'ax';
  my $DECOMP_STRING_FORMAT = 'c';
  my $STRING_WHITE_SPACE_LIST = 'sw';
  
@@ -1332,6 +1358,7 @@ my %map_table_formats = (
      $RATIONAL_FORMAT => 'rational: an integer or a fraction',
      $STRING_FORMAT => 'string',
      $ADJUST_FORMAT => 'some entries need adjustment',
+    $HEX_ADJUST_FORMAT => 'mapped value in hex; some entries need adjustment',
      $DECOMP_STRING_FORMAT => 'Perl\'s internal (Normalize.pm) decomposition mapping',
      $STRING_WHITE_SPACE_LIST => 'string, but some elements are interpreted as a list; white space occurs only as list item separators'
  );
@@ -1341,12 +1368,18 @@ my $EXTRACTED_DIR = (-d 'extracted') ? 'extracted' : "";
  my $EXTRACTED = ($EXTRACTED_DIR) ? "$EXTRACTED_DIR/" : "";
  my $AUXILIARY = 'auxiliary';
  
-# Hashes that will eventually go into Heavy.pl for the use of utf8_heavy.pl
-# and into UCD.pl for the use of UCD.pm
+# Hashes and arrays that will eventually go into Heavy.pl for the use of
+# utf8_heavy.pl and into UCD.pl for the use of UCD.pm
  my %loose_to_file_of;       # loosely maps table names to their respective
                              # files
  my %stricter_to_file_of;    # same; but for stricter mapping.
  my %loose_property_to_file_of; # Maps a loose property name to its map file
+my @inline_definitions = "V0"; # Each element gives a definition of a unique
+                            # inversion list.  When a definition is inlined,
+                            # its value in the hash it's in (one of the two
+                            # defined just above) will include an index into
+                            # this array.  The 0th element is initialized to
+                            # the definition for a zero length invwersion list
  my %file_to_swash_name;     # Maps the file name to its corresponding key name
                              # in the hash %utf8::SwashInfo
  my %nv_floating_to_rational; # maps numeric values floating point numbers to
@@ -1451,12 +1484,13 @@ my $MIN_FRACTION_LENGTH = 3; # How many digits of a floating point number at
  my $MAX_FLOATING_SLOP = 10 ** - $MIN_FRACTION_LENGTH; # And in floating terms
  
  # These store references to certain commonly used property objects
+my $ccc;
  my $gc;
  my $perl;
  my $block;
  my $perl_charname;
  my $print;
-my $Any;
+my $All;
  my $script;
  
  # Are there conflicting names because of beginning with 'In_', or 'Is_'
@@ -1518,7 +1552,8 @@ my $UNASSIGNED_TYPE = -2;
  my $PRIVATE_USE_TYPE = -3;
  my $NONCHARACTER_TYPE = -4;
  my $CONTROL_TYPE = -5;
-my $UNKNOWN_TYPE = -6;  # Used only if there is a bug in this program
+my $ABOVE_UNICODE_TYPE = -6;
+my $UNKNOWN_TYPE = -7;  # Used only if there is a bug in this program
  
  sub populate_char_info ($) {
      # Used only with the $annotate option.  Populates the arrays with the
@@ -1549,7 +1584,13 @@ sub populate_char_info ($) {
      my $end;
      if (! $viacode[$i]) {
          my $nonchar;
-        if ($gc-> table('Private_use')->contains($i)) {
+        if ($i > $MAX_UNICODE_CODEPOINT) {
+            $viacode[$i] = 'Above-Unicode';
+            $annotate_char_type[$i] = $ABOVE_UNICODE_TYPE;
+            $printable[$i] = 0;
+            $end = $MAX_WORKING_CODEPOINT;
+        }
+        elsif ($gc-> table('Private_use')->contains($i)) {
              $viacode[$i] = 'Private Use';
              $annotate_char_type[$i] = $PRIVATE_USE_TYPE;
              $printable[$i] = 0;
@@ -1698,6 +1739,20 @@ sub clarify_number ($) {
      return $number;
  }
  
+sub clarify_code_point_count ($) {
+    # This is like clarify_number(), but the input is assumed to be a count of
+    # code points, rather than a generic number.
+
+    my $append = "";
+
+    my $number = shift;
+    if ($number > $MAX_UNICODE_CODEPOINTS) {
+        $number -= ($MAX_WORKING_CODEPOINTS - $MAX_UNICODE_CODEPOINTS);
+        return "All above-Unicode code points" if $number == 0;
+        $append = " + all above-Unicode code points";
+    }
+    return clarify_number($number) . $append;
+}
  
  package Carp;
  
@@ -1805,7 +1860,8 @@ package main;
  { # Closure
  
      # This program uses the inside-out method for objects, as recommended in
-    # "Perl Best Practices".  This closure aids in generating those.  There
+    # "Perl Best Practices".  (This is the best solution still, since this has
+    # to run under miniperl.)  This closure aids in generating those.  There
      # are two routines.  setup_package() is called once per package to set
      # things up, and then set_access() is called for each hash representing a
      # field in the object.  These routines arrange for the object to be
@@ -2455,6 +2511,20 @@ END
                  return 0;
              }
              $handle{$addr} = $file_handle; # Cache the open file handle
+
+            if ($v_version ge v3.2.0
+                && lc($file) ne 'unicodedata.txt'
+
+                    # Unihan files used another format until v7
+                && ($v_version ge v7.0.0 || $file !~ /^Unihan/i))
+            {
+                $_ = <$file_handle>;
+                if ($_ !~ / - $string_version \. /x) {
+                    chomp;
+                    $_ =~ s/^#\s*//;
+                    die Carp::my_carp("File '$file' is version '$_'.  It should be version $string_version");
+                }
+            }
          }
  
          if ($verbosity >= $PROGRESS) {
@@ -3422,7 +3492,7 @@ sub trace { return main::trace(@_); }
  
          # If the range list is empty, return a large value that isn't adjacent
          # to any that could be in the range list, for simpler tests
-        return $MAX_UNICODE_CODEPOINT + 2 unless scalar @{$ranges{$addr}};
+        return $MAX_WORKING_CODEPOINT + 2 unless scalar @{$ranges{$addr}};
          return $ranges{$addr}->[0]->start;
      }
  
@@ -3701,9 +3771,6 @@ sub trace { return main::trace(@_); }
              Carp::my_carp_bug("$owner_name_of{$addr}End of range (" . sprintf("%04X", $end) . ") must not be before start (" . sprintf("%04X", $start) . ").  No action taken.");
              return;
          }
-        if ($end > $MAX_UNICODE_CODEPOINT && $operation eq '+') {
-            Carp::my_carp("$owner_name_of{$addr}Warning: Range '" . sprintf("%04X..%04X", $start, $end) . ") is above the Unicode maximum of " . sprintf("%04X", $MAX_UNICODE_CODEPOINT) . ".  Adding it anyway");
-        }
          #local $to_trace = 1 if main::DEBUG;
  
          if ($operation eq '-') {
@@ -4501,8 +4568,8 @@ sub trace { return main::trace(@_); }
  
          # And finally, add the gap from the end of the table to the max
          # possible code point
-        if ($max < $MAX_UNICODE_CODEPOINT) {
-            $new->add_range($max + 1, $MAX_UNICODE_CODEPOINT);
+        if ($max < $MAX_WORKING_CODEPOINT) {
+            $new->add_range($max + 1, $MAX_WORKING_CODEPOINT);
          }
          return $new;
      }
@@ -4791,6 +4858,7 @@ sub trace { return main::trace(@_); }
                  # range.
                  my $end = $set->end;
                  return $end if is_code_point_usable($end, $try_hard);
+                $end = $MAX_UNICODE_CODEPOINT + 1 if $end > $MAX_UNICODE_CODEPOINT;
  
                  # End point didn't, work.  Start at the beginning and try
                  # every one until find one that does work.
@@ -4966,6 +5034,11 @@ sub trace { return main::trace(@_); }
      # The constructor can override the global flag of the same name.
      main::set_access('output_range_counts', \%output_range_counts, 'r');
  
+    my %write_as_invlist;
+    # A boolean set iff the output file for this table is to be in the form of
+    # an inversion list/map.
+    main::set_access('write_as_invlist', \%write_as_invlist, 'r');
+
      my %format;
      # The format of the entries of the table.  This is calculated from the
      # data in the table (or passed in the constructor).  This is an enum e.g.,
@@ -5002,6 +5075,7 @@ sub trace { return main::trace(@_); }
          $range_size_1{$addr} = delete $args{'Range_Size_1'} || 0;
          $caseless_equivalent{$addr} = delete $args{'Caseless_Equivalent'} || 0;
          $fate{$addr} = delete $args{'Fate'} || $ORDINARY;
+        $write_as_invlist{$addr} = delete $args{'Write_As_Invlist'};# No default
          my $ucd = delete $args{'UCD'};
  
          my $description = delete $args{'Description'};
@@ -5482,6 +5556,28 @@ END
          return $return;
      }
  
+    sub merge_single_annotation_line ($$$) {
+        my ($output, $annotation, $annotation_column) = @_;
+
+        # This appends an annotation comment, $annotation, to $output,
+        # starting in or after column $annotation_column, removing any
+        # pre-existing comment from $output.
+
+        $annotation =~ s/^ \s* \# \  //x;
+        $output =~ s/ \s* ( \# \N* )? \n //x;
+        $output = Text::Tabs::expand($output);
+
+        my $spaces = $annotation_column - length $output;
+        $spaces = 2 if $spaces < 0;  # Have 2 blanks before the comment
+
+        $output = sprintf "%s%*s# %s",
+                            $output,
+                            $spaces,
+                            " ",
+                            $annotation;
+        return Text::Tabs::unexpand $output;
+    }
+
      sub write {
          # Write a representation of the table to its file.  It calls several
          # functions furnished by sub-classes of this abstract base class to
@@ -5491,14 +5587,13 @@ END
  
          my $self = shift;
          my $use_adjustments = shift; # ? output in adjusted format or not
-        my $tab_stops = shift;       # The number of tab stops over to put any
-                                     # comment.
          my $suppress_value = shift;  # Optional, if the value associated with
                                       # a range equals this one, don't write
                                       # the range
          Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
          my $addr = do { no overloading; pack 'J', $self; };
+        my $write_as_invlist = $write_as_invlist{$addr};
  
          # Start with the header
          my @HEADER = $self->header;
@@ -5511,14 +5606,10 @@ END
          # affect what gets output before it, therefore pre_body() isn't called
          # until after all other processing of the table is done.
  
-        # The main body looks like a 'here' document.  If annotating, get rid
-        # of the comments before passing to the caller, as some callers, such
-        # as charnames.pm, can't cope with them.  (Outputting range counts
-        # also introduces comments, but these don't show up in the tables that
-        # can't cope with comments, and there aren't that many of them that
-        # it's worth the extra real time to get rid of them).
+        # The main body looks like a 'here' document.  If there are comments,
+        # get rid of them when processing it.
          my @OUT;
-        if ($annotate) {
+        if ($annotate || $output_range_counts) {
              # Use the line below in Perls that don't have /r
              #push @OUT, 'return join "\n",  map { s/\s*#.*//mg; $_ } split "\n", <<\'END\';' . "\n";
              push @OUT, "return <<'END' =~ s/\\s*#.*//mgr;\n";
@@ -5530,9 +5621,9 @@ END
  
              # This is a kludge for empty tables to silence a warning in
              # utf8.c, which can't really deal with empty tables, but it can
-            # deal with a table that matches nothing, as the inverse of 'Any'
+            # deal with a table that matches nothing, as the inverse of 'All'
              # does.
-            push @OUT, "!utf8::Any\n";
+            push @OUT, "!utf8::All\n";
          }
          elsif ($self->name eq 'N'
  
@@ -5554,16 +5645,24 @@ END
          }
          else {
              my $range_size_1 = $range_size_1{$addr};
-            my $format;            # Used only in $annotate option
-            my $include_name;      # Used only in $annotate option
-
-            if ($annotate) {
-
-                # If annotating each code point, must print 1 per line.
-                # The variable could point to a subroutine, and we don't want
-                # to lose that fact, so only set if not set already
-                $range_size_1 = 1 if ! $range_size_1;
  
+            # To make it more readable, use a minimum indentation
+            my $comment_indent;
+
+            # These are used only in $annotate option
+            my $format;         # e.g. $HEX_ADJUST_FORMAT
+            my $include_name;   # ? Include the character's name in the
+                                # annotation?
+            my $include_cp;     # ? Include its code point
+
+            if (! $annotate) {
+                $comment_indent = ($self->isa('Map_Table'))
+                                  ? 24
+                                  : ($write_as_invlist)
+                                    ? 8
+                                    : 16;
+            }
+            else {
                  $format = $self->format;
  
                  # The name of the character is output only for tables that
@@ -5575,6 +5674,96 @@ END
                          || $property == main::property_ref('Name')
                          || $property == main::property_ref('Name_Alias')
                         );
+
+                # Don't include the code point in the annotation where all
+                # lines are a single code point, so it can be easily found in
+                # the first column
+                $include_cp = ! $range_size_1;
+
+                if (! $self->isa('Map_Table')) {
+                    $comment_indent = ($write_as_invlist) ? 8 : 16;
+                }
+                else {
+                    $comment_indent = 16;
+
+                    # There are just a few short ranges in this table, so no
+                    # need to include the code point in the annotation.
+                    $include_cp = 0 if $format eq $DECOMP_STRING_FORMAT;
+
+                    # We're trying to get this to look good, as the whole
+                    # point is to make human-readable tables.  It is easier to
+                    # read if almost all the annotation comments begin in the
+                    # same column.  Map tables have varying width maps, so can
+                    # create a jagged comment appearance.  This code does a
+                    # preliminary pass through these tables looking for the
+                    # maximum width map in each, and causing the comments to
+                    # begin just to the right of that.  However, if the
+                    # comments begin too far to the right of most lines, it's
+                    # hard to line them up horizontally with their real data.
+                    # Therefore we ignore the longest outliers
+                    my $ignore_longest_X_percent = 2;  # Discard longest X%
+
+                    # Each key in this hash is a width of at least one of the
+                    # maps in the table.  Its value is how many lines have
+                    # that width.
+                    my %widths;
+
+                    # We won't space things further left than one tab stop
+                    # after the rest of the line; initializing it to that
+                    # number saves some work.
+                    my $max_map_width = 8;
+
+                    # Fill in the %widths hash
+                    my $total = 0;
+                    for my $set ($range_list{$addr}->ranges) {
+                        my $value = $set->value;
+
+                        # These range types don't appear in the main table
+                        next if $set->type == 0
+                                && defined $suppress_value
+                                && $value eq $suppress_value;
+                        next if $set->type == $MULTI_CP
+                                || $set->type == $NULL;
+
+                        # Include 2 spaces before the beginning of the
+                        # comment
+                        my $this_width = length($value) + 2;
+
+                        # Ranges of the remaining non-zero types usually
+                        # occupy just one line (maybe occasionally two, but
+                        # this doesn't have to be dead accurate).  This is
+                        # because these ranges are like "unassigned code
+                        # points"
+                        my $count = ($set->type != 0)
+                                    ? 1
+                                    : $set->end - $set->start + 1;
+                        $widths{$this_width} += $count;
+                        $total += $count;
+                        $max_map_width = $this_width
+                                            if $max_map_width < $this_width;
+                    }
+
+                    # If the widest map gives us less than two tab stops
+                    # worth, just take it as-is.
+                    if ($max_map_width > 16) {
+
+                        # Otherwise go through %widths until we have included
+                        # the desired percentage of lines in the whole table.
+                        my $running_total = 0;
+                        foreach my $width (sort { $a <=> $b } keys %widths)
+                        {
+                            $running_total += $widths{$width};
+                            use integer;
+                            if ($running_total * 100 / $total
+                                            >= 100 - $ignore_longest_X_percent)
+                            {
+                                $max_map_width = $width;
+                                last;
+                            }
+                        }
+                    }
+                    $comment_indent += $max_map_width;
+                }
              }
  
              # Values for previous time through the loop.  Initialize to
@@ -5591,6 +5780,36 @@ END
              my $next_end;
              my $next_value;
              my $offset = 0;
+            my $invlist_count = 0;
+
+            my $output_value_in_hex = $self->isa('Map_Table')
+                                && ($self->format eq $HEX_ADJUST_FORMAT
+                                    || $self->to_output_map == $EXTERNAL_MAP);
+            # Use leading zeroes just for files whose format should not be
+            # changed from what it has been.  Otherwise, they just take up
+            # space and time to process.
+            my $hex_format = ($self->isa('Map_Table')
+                              && $self->to_output_map == $EXTERNAL_MAP)
+                             ? "%04X"
+                             : "%X";
+
+            # The values for some of these tables are stored in mktables as
+            # hex strings.  Normally, these are just output as strings without
+            # change, but when we are doing adjustments, we have to operate on
+            # these numerically, so we convert those to decimal to do that,
+            # and back to hex for output
+            my $convert_map_to_from_hex = 0;
+            my $output_map_in_hex = 0;
+            if ($self->isa('Map_Table')) {
+                $convert_map_to_from_hex
+                   = ($use_adjustments && $self->format eq $HEX_ADJUST_FORMAT)
+                      || ($annotate && $self->format eq $HEX_FORMAT);
+                $output_map_in_hex = $convert_map_to_from_hex
+                                 || $self->format eq $HEX_FORMAT;
+            }
+
+            # To store any annotations about the characters.
+            my @annotation;
  
              # Output each range as part of the here document.
              RANGE:
@@ -5607,33 +5826,53 @@ END
                  next RANGE if defined $suppress_value
                                && $value eq $suppress_value;
  
+                $value = CORE::hex $value if $convert_map_to_from_hex;
+
+
                  {   # This bare block encloses the scope where we may need to
-                    # split a range (when outputting adjusteds), and each time
-                    # through we handle the next portion of the original by
-                    # ending the block with a 'redo'.   The values to use for
-                    # that next time through are set up just below in the
-                    # scalars whose names begin with '$next_'.
-
-                    if ($use_adjustments) {
-
-                        # When converting to use adjustments, we can handle
-                        # only single element ranges.  Set up so that this
-                        # time through the loop, we look at the first element,
-                        # and the next time through, we start off with the
-                        # remainder.  Thus each time through we look at the
-                        # first element of the range
-                        if ($end != $start) {
-                            $next_start = $start + 1;
-                            $next_end = $end;
-                            $next_value = $value;
-                            $end = $start;
-                        }
+                    # 'redo' to.  Consider a table that is to be written out
+                    # using single item ranges.  This is given in the
+                    # $range_size_1 boolean.  To accomplish this, we split the
+                    # range each time through the loop into two portions, the
+                    # first item, and the rest.  We handle that first item
+                    # this time in the loop, and 'redo' to repeat the process
+                    # for the rest of the range.
+                    #
+                    # We may also have to do it, with other special handling,
+                    # if the table has adjustments.  Consider the table that
+                    # contains the lowercasing maps.  mktables stores the
+                    # ASCII range ones as 26 ranges:
+                    #       ord('A') => ord('a'), .. ord('Z') => ord('z')
+                    # For compactness, the table that gets written has this as
+                    # just one range
+                    #       ( ord('A') .. ord('Z') ) => ord('a')
+                    # and the software that reads the tables is smart enough
+                    # to "connect the dots".  This change is accomplished in
+                    # this loop by looking to see if the current iteration
+                    # fits the paradigm of the previous iteration, and if so,
+                    # we merge them by replacing the final output item with
+                    # the merged data.  Repeated 25 times, this gets A-Z.  But
+                    # we also have to make sure we don't screw up cases where
+                    # we have internally stored
+                    #       ( 0x1C4 .. 0x1C6 ) => 0x1C5
+                    # This single internal range has to be output as 3 ranges,
+                    # which is done by splitting, like we do for $range_size_1
+                    # tables.  (There are very few of such ranges that need to
+                    # be split, so the gain of doing the combining of other
+                    # ranges far outweighs the splitting of these.)  The
+                    # values to use for the redo at the end of this block are
+                    # set up just below in the scalars whose names begin with
+                    # '$next_'.
+
+                    if (($use_adjustments || $range_size_1) && $end != $start)
+                    {
+                        $next_start = $start + 1;
+                        $next_end = $end;
+                        $next_value = $value;
+                        $end = $start;
+                    }
  
-                        # The values for some of these tables are stored as
-                        # hex strings.  Convert those to decimal
-                        $value = hex($value)
-                                    if $self->default_map eq $CODE_POINT
-                                        && $value =~ / ^ [A-Fa-f0-9]+ $ /x;
+                    if ($use_adjustments && ! $range_size_1) {
  
                          # If this range is adjacent to the previous one, and
                          # the values in each are integers that are also
@@ -5656,7 +5895,15 @@ END
                          }
                          else {
                              $offset = 0;
+                            if (@annotation == 1) {
+                                $OUT[-1] = merge_single_annotation_line(
+                                    $OUT[-1], $annotation[0], $comment_indent);
+                            }
+                            else {
+                                push @OUT, @annotation;
+                            }
                          }
+                        undef @annotation;
  
                          # Save the current values for the next time through
                          # the loop.
@@ -5665,36 +5912,56 @@ END
                          $previous_value = $value;
                      }
  
-                    # If there is a range and doesn't need a single point range
-                    # output
-                    if ($start != $end && ! $range_size_1) {
-                        push @OUT, sprintf "%04X\t%04X", $start, $end;
-                        $OUT[-1] .= "\t$value" if $value ne "";
+                    if ($write_as_invlist) {
+
+                        # Inversion list format has a single number per line,
+                        # the starting code point of a range that matches the
+                        # property
+                        push @OUT, $start, "\n";
+                        $invlist_count++;
  
                          # Add a comment with the size of the range, if
-                        # requested.  Expand Tabs to make sure they all start
-                        # in the same column, and then unexpand to use mostly
-                        # tabs.
-                        if (! $output_range_counts{$addr}) {
-                            $OUT[-1] .= "\n";
+                        # requested.
+                        if ($output_range_counts{$addr}) {
+                            $OUT[-1] = merge_single_annotation_line(
+                                    $OUT[-1],
+                                    "# ["
+                                      . main::clarify_code_point_count($end - $start + 1)
+                                      . "]\n",
+                                    $comment_indent);
+                        }
+                    }
+                    elsif ($start != $end) { # If there is a range
+                        if ($end == $MAX_WORKING_CODEPOINT) {
+                            push @OUT, sprintf "$hex_format\t$hex_format",
+                                                $start,
+                                                $MAX_PLATFORM_CODEPOINT;
                          }
                          else {
-                            $OUT[-1] = Text::Tabs::expand($OUT[-1]);
-                            my $count = main::clarify_number($end - $start + 1);
-                            use integer;
+                            push @OUT, sprintf "$hex_format\t$hex_format",
+                                                $start,       $end;
+                        }
+                        if (length $value) {
+                            if ($convert_map_to_from_hex) {
+                                $OUT[-1] .= sprintf "\t$hex_format\n", $value;
+                            }
+                            else {
+                                $OUT[-1] .= "\t$value\n";
+                            }
+                        }
  
-                            my $width = $tab_stops * 8 - 1;
-                            $OUT[-1] = sprintf("%-*s # [%s]\n",
-                                                $width,
-                                                $OUT[-1],
-                                                $count);
-                            $OUT[-1] = Text::Tabs::unexpand($OUT[-1]);
+                        # Add a comment with the size of the range, if
+                        # requested.
+                        if ($output_range_counts{$addr}) {
+                            $OUT[-1] = merge_single_annotation_line(
+                                    $OUT[-1],
+                                    "# ["
+                                      . main::clarify_code_point_count($end - $start + 1)
+                                      . "]\n",
+                                    $comment_indent);
                          }
                      }
-
-                        # Here to output a single code point per line.
-                        # If not to annotate, use the simple formats
-                    elsif (! $annotate) {
+                    else { # Here to output a single code point per line.
  
                          # Use any passed in subroutine to output.
                          if (ref $range_size_1 eq 'CODE') {
@@ -5706,18 +5973,27 @@ END
  
                              # Here, caller is ok with default output.
                              for (my $i = $start; $i <= $end; $i++) {
-                                push @OUT, sprintf "%04X\t\t%s\n", $i, $value;
+                                if ($convert_map_to_from_hex) {
+                                    push @OUT,
+                                        sprintf "$hex_format\t\t$hex_format\n",
+                                                 $i,            $value;
+                                }
+                                else {
+                                    push @OUT, sprintf $hex_format, $i;
+                                    $OUT[-1] .= "\t\t$value" if $value ne "";
+                                    $OUT[-1] .= "\n";
+                                }
                              }
                          }
                      }
-                    else {
  
-                        # Here, wants annotation.
+                    if ($annotate) {
                          for (my $i = $start; $i <= $end; $i++) {
+                            my $annotation = "";
  
                              # Get character information if don't have it already
                              main::populate_char_info($i)
-                                                if ! defined $viacode[$i];
+                                                     if ! defined $viacode[$i];
                              my $type = $annotate_char_type[$i];
  
                              # Figure out if should output the next code points
@@ -5726,9 +6002,12 @@ END
                              # so returns $i.  Otherwise use the end of the
                              # annotation range, but no further than the
                              # maximum possible end point of the loop.
-                            my $range_end = main::min(
-                                        $annotate_ranges->value_of($i) || $i,
-                                        $end);
+                            my $range_end =
+                                        $range_size_1
+                                        ? $start
+                                        : main::min(
+                                          $annotate_ranges->value_of($i) || $i,
+                                          $end);
  
                              # Use a range if it is a range, and either is one
                              # of the special annotation ranges, or the range
@@ -5742,9 +6021,6 @@ END
                                  # Here is to output a range.  We don't allow a
                                  # caller-specified output format--just use the
                                  # standard one.
-                                push @OUT, sprintf "%04X\t%04X\t%s\t#", $i,
-                                                                $range_end,
-                                                                $value;
                                  my $range_name = $viacode[$i];
  
                                  # For the code points which end in their hex
@@ -5752,7 +6028,7 @@ END
                                  # annotation, and capitalize only the first
                                  # letter of each word.
                                  if ($type == $CP_IN_NAME) {
-                                    my $hex = sprintf "%04X", $i;
+                                    my $hex = sprintf $hex_format, $i;
                                      $range_name =~ s/-$hex$//;
                                      my @words = split " ", $range_name;
                                      for my $word (@words) {
@@ -5765,13 +6041,27 @@ END
                                      $range_name = "Hangul Syllable";
                                  }
  
-                                $OUT[-1] .= " $range_name" if $range_name;
+                                if ($i != $start || $range_end < $end) {
+                                    if ($range_end < $MAX_WORKING_CODEPOINT)
+                                    {
+                                        $annotation = sprintf "%04X..%04X",
+                                                              $i,   $range_end;
+                                    }
+                                    else {
+                                        $annotation = sprintf "%04X..INFINITY",
+                                                               $i;
+                                    }
+                                }
+                                else { # Indent if not displaying code points
+                                    $annotation = " " x 4;
+                                }
+                                $annotation .= " $range_name" if $range_name;
  
                                  # Include the number of code points in the
                                  # range
                                  my $count =
-                                    main::clarify_number($range_end - $i + 1);
-                                $OUT[-1] .= " [$count]\n";
+                                    main::clarify_code_point_count($range_end - $i + 1);
+                                $annotation .= " [$count]\n";
  
                                  # Skip to the end of the range
                                  $i = $range_end;
@@ -5781,12 +6071,10 @@ END
  
                                  # When outputting the names of each character,
                                  # use the character itself if printable
-                                $comment .= "'" . chr($i) . "' "
+                                $comment .= "'" . main::display_chr($i) . "' "
                                                              if $printable[$i];
  
-                                # To make it more readable, use a minimum
-                                # indentation
-                                my $comment_indent;
+                                my $output_value = $value;
  
                                  # Determine the annotation
                                  if ($format eq $DECOMP_STRING_FORMAT) {
@@ -5799,14 +6087,15 @@ END
                                      # strings, one of the printable
                                      # characters, and one of their official
                                      # names.
-                                    (my $map = $value) =~ s/ \ * < .*? > \ +//x;
+                                    (my $map = $output_value)
+                                                    =~ s/ \ * < .*? > \ +//x;
                                      my $tostr = "";
                                      my $to_name = "";
                                      my $to_chr = "";
                                      foreach my $to (split " ", $map) {
                                          $to = CORE::hex $to;
                                          $to_name .= " + " if $to_name;
-                                        $to_chr .= chr($to);
+                                        $to_chr .= main::display_chr($to);
                                          main::populate_char_info($to)
                                                      if ! defined $viacode[$to];
                                          $to_name .=  $viacode[$to];
@@ -5814,57 +6103,77 @@ END
  
                                      $comment .=
                                      "=> '$to_chr'; $viacode[$i] => $to_name";
-                                    $comment_indent = 25;   # Determined by
-                                                            # experiment
                                  }
                                  else {
-
-                                    # Assume that any table that has hex
-                                    # format is a mapping of one code point to
-                                    # another.
-                                    if ($format eq $HEX_FORMAT) {
-                                        my $decimal_value = CORE::hex $value;
-                                        main::populate_char_info($decimal_value)
-                                        if ! defined $viacode[$decimal_value];
-                                        $comment .= "=> '"
-                                        . chr($decimal_value)
-                                        . "'; " if $printable[$decimal_value];
+                                    $output_value += $i - $start
+                                                   if $use_adjustments
+                                                      # Don't try to adjust a
+                                                      # non-integer
+                                                   && $output_value !~ /[-\D]/;
+
+                                    if ($output_map_in_hex) {
+                                        main::populate_char_info($output_value)
+                                          if ! defined $viacode[$output_value];
+                                        $comment .= " => '"
+                                        . main::display_chr($output_value)
+                                        . "'; " if $printable[$output_value];
+                                    }
+                                    if ($include_name && $viacode[$i]) {
+                                        $comment .= " " if $comment;
+                                        $comment .= $viacode[$i];
                                      }
-                                    $comment .= $viacode[$i] if $include_name
-                                                            && $viacode[$i];
-                                    if ($format eq $HEX_FORMAT) {
-                                        my $decimal_value = CORE::hex $value;
+                                    if ($output_map_in_hex) {
                                          $comment .=
-                                            " => $viacode[$decimal_value]"
-                                                if $viacode[$decimal_value];
+                                                " => $viacode[$output_value]"
+                                                    if $viacode[$output_value];
+                                        $output_value = sprintf($hex_format,
+                                                                $output_value);
                                      }
-
-                                    # If including the name, no need to
-                                    # indent, as the name will already be way
-                                    # across the line.
-                                    $comment_indent = ($include_name) ? 0 : 60;
                                  }
  
-                                # Use any passed in routine to output the base
-                                # part of the line.
-                                if (ref $range_size_1 eq 'CODE') {
-                                    my $base_part=&{$range_size_1}($i, $value);
-                                    chomp $base_part;
-                                    push @OUT, $base_part;
+                                if ($include_cp) {
+                                    $annotation = sprintf "%04X", $i;
+                                    if ($use_adjustments) {
+                                        $annotation .= " => $output_value";
+                                    }
                                  }
-                                else {
-                                    push @OUT, sprintf "%04X\t\t%s", $i, $value;
+
+                                if ($comment ne "") {
+                                    $annotation .= " " if $annotation ne "";
+                                    $annotation .= $comment;
                                  }
+                                $annotation .= "\n" if $annotation ne "";
+                            }
  
-                                # And add the annotation.
-                                $OUT[-1] = sprintf "%-*s\t# %s",
-                                                   $comment_indent,
-                                                   $OUT[-1],
-                                                   $comment
-                                            if $comment;
-                                $OUT[-1] .= "\n";
+                            if ($annotation ne "") {
+                                push @annotation, (" " x $comment_indent)
+                                                  .  "# $annotation";
                              }
                          }
+
+                        # If not adjusting, we don't have to go through the
+                        # loop again to know that the annotation comes next
+                        # in the output.
+                        if (! $use_adjustments) {
+                            if (@annotation == 1) {
+                                $OUT[-1] = merge_single_annotation_line(
+                                    $OUT[-1], $annotation[0], $comment_indent);
+                            }
+                            else {
+                                push @OUT, map { Text::Tabs::unexpand $_ }
+                                               @annotation;
+                            }
+                            undef @annotation;
+                        }
+                    }
+
+                    # Add the beginning of the range that doesn't match the
+                    # property, except if the just added match range extends
+                    # to infinity.  We do this after any annotations for the
+                    # match range.
+                    if ($write_as_invlist && $end < $MAX_WORKING_CODEPOINT) {
+                        push @OUT, $end + 1, "\n";
+                        $invlist_count++;
                      }
  
                      # If we split the range, set up so the next time through
@@ -5878,6 +6187,10 @@ END
                      }
                  }
              } # End of loop through all the table's ranges
+
+            push @OUT, @annotation; # Add orphaned annotation, if any
+
+            splice @OUT, 1, 0, "V$invlist_count\n" if $invlist_count;
          }
  
          # Add anything that goes after the main body, but within the here
@@ -6097,13 +6410,22 @@ sub trace { return main::trace(@_); }
                      \%anomalous_entries,
                      'readable_array');
  
+    my %replacement_property;
+    # Certain files are unused by Perl itself, and are kept only for backwards
+    # compatibility for programs that used them before Unicode::UCD existed.
+    # These are termed legacy properties.  At some point they may be removed,
+    # but for now mark them as legacy.  If non empty, this is the name of the
+    # property to use instead (i.e., the modern equivalent).
+    main::set_access('replacement_property', \%replacement_property, 'r');
+
      my %to_output_map;
      # Enum as to whether or not to write out this map table, and how:
      #   0               don't output
      #   $EXTERNAL_MAP   means its existence is noted in the documentation, and
      #                   it should not be removed nor its format changed.  This
      #                   is done for those files that have traditionally been
-    #                   output.
+    #                   output.  Maps of legacy-only properties default to
+    #                   this.
      #   $INTERNAL_MAP   means Perl reserves the right to do anything it wants
      #                   with this file
      #   $OUTPUT_ADJUSTED means that it is an $INTERNAL_MAP, and instead of
@@ -6128,9 +6450,17 @@ sub trace { return main::trace(@_); }
          my $default_map = delete $args{'Default_Map'};
          my $property = delete $args{'_Property'};
          my $full_name = delete $args{'Full_Name'};
+        my $replacement_property = delete $args{'Replacement_Property'} // "";
          my $to_output_map = delete $args{'To_Output_Map'};
  
-        # Rest of parameters passed on
+        # Rest of parameters passed on; legacy properties have several common
+        # other attributes
+        if ($replacement_property) {
+            $args{"Fate"} = $LEGACY_ONLY;
+            $args{"Range_Size_1"} = 1;
+            $args{"Perl_Extension"} = 1;
+            $args{"UCD"} = 0;
+        }
  
          my $range_list = Range_Map->new(Owner => $property);
  
@@ -6140,12 +6470,16 @@ sub trace { return main::trace(@_); }
                                      Full_Name => $full_name,
                                      _Property => $property,
                                      _Range_List => $range_list,
+                                    Write_As_Invlist => 0,
                                      %args);
  
          my $addr = do { no overloading; pack 'J', $self; };
  
          $anomalous_entries{$addr} = [];
          $default_map{$addr} = $default_map;
+        $replacement_property{$addr} = $replacement_property;
+        $to_output_map = $EXTERNAL_MAP if ! defined $to_output_map
+                                          && $replacement_property;
          $to_output_map{$addr} = $to_output_map;
  
          $self->initialize($initialize) if defined $initialize;
@@ -6367,7 +6701,13 @@ sub trace { return main::trace(@_); }
              $return .= $INTERNAL_ONLY_HEADER;
          }
          else {
-            my $property_name = $self->property->full_name =~ s/Legacy_//r;
+            my $property_name = $self->property->replacement_property;
+
+            # The legacy-only properties were gotten above; but there are some
+            # other properties whose files are in current use that have fixed
+            # formats.
+            $property_name = $self->property->full_name unless $property_name;
+
              $return .= <<END;
  
  # !!!!!!!   IT IS DEPRECATED TO USE THIS FILE   !!!!!!!
@@ -6430,7 +6770,7 @@ END
  
          # Get a  string version of $count with underscores in large numbers,
          # for clarity.
-        my $string_count = main::clarify_number($count);
+        my $string_count = main::clarify_code_point_count($count);
  
          my $code_points = ($count == 1)
                          ? 'single code point'
@@ -6476,7 +6816,7 @@ END
          my $comment = "";
  
          my $status = $self->status;
-        if ($status && $status ne $PLACEHOLDER) {
+        if ($status ne $NORMAL) {
              my $warn = uc $status_past_participles{$status};
              $comment .= <<END;
  
@@ -6652,18 +6992,21 @@ END
                      $to = CORE::hex $to;
                      if ($annotate) {
                          $to_name .= " + " if $to_name;
-                        $to_chr .= chr($to);
+                        $to_chr .= main::display_chr($to);
                          main::populate_char_info($to)
                                              if ! defined $viacode[$to];
                          $to_name .=  $viacode[$to];
                      }
                  }
  
-                # I (khw) have never waded through this line to
-                # understand it well enough to comment it.
+                # The unpack yields a list of the bytes that comprise the
+                # UTF-8 of $code_point, which are each placed in \xZZ format
+                # and output in the %s to map to $tostr, so the result looks
+                # like:
+                # "\xC4\xB0" => "\x{0069}\x{0307}",
                  my $utf8 = sprintf(qq["%s" => "$tostr",],
                          join("", map { sprintf "\\x%02X", $_ }
-                            unpack("U0C*", pack("U", $code_point))));
+                            unpack("U0C*", chr $code_point)));
  
                  # Add a comment so that a human reader can more easily
                  # see what's going on.
@@ -6676,7 +7019,7 @@ END
                      main::populate_char_info($code_point)
                                      if ! defined $viacode[$code_point];
                      $multi_code_point_maps[-1] .= " '"
-                        . chr($code_point)
+                        . main::display_chr($code_point)
                          . "' => '$to_chr'; $viacode[$code_point] => $to_name";
                  }
              }
@@ -6881,20 +7224,22 @@ END
          }
  
          # If the output is to be adjusted, the format of the table that gets
-        # output is actually 'a' instead of whatever it is stored internally
-        # as.
+        # output is actually 'a' or 'ax' instead of whatever it is stored
+        # internally as.
          my $output_adjusted = ($self->to_output_map == $OUTPUT_ADJUSTED);
          if ($output_adjusted) {
-            $format = $ADJUST_FORMAT;
+            if ($default_map eq $CODE_POINT) {
+                $format = $HEX_ADJUST_FORMAT;
+            }
+            else {
+                $format = $ADJUST_FORMAT;
+            }
          }
  
          $self->_set_format($format);
  
          return $self->SUPER::write(
              $output_adjusted,
-            ($self->property == $block)
-                ? 7     # block file needs more tab stops
-                : 3,
              $default_map);   # don't write defaulteds
      }
  
@@ -7051,6 +7396,7 @@ sub trace { return main::trace(@_); }
                                        _Property => $property,
                                        _Range_List => $range_list,
                                        Format => $EMPTY_FORMAT,
+                                      Write_As_Invlist => 1,
                                        );
          my $addr = do { no overloading; pack 'J', $self; };
  
@@ -7426,7 +7772,7 @@ sub trace { return main::trace(@_); }
          my $self = shift;
          Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
-        return $self->SUPER::write(0, 2); # No adjustments; 2 tab stops
+        return $self->SUPER::write(0); # No adjustments
      }
  
      sub set_final_comment {
@@ -7457,7 +7803,18 @@ END
          # Get the number of code points matched by each of the tables in this
          # file, and add underscores for clarity.
          my $count = $leader->count;
-        my $string_count = main::clarify_number($count);
+        my $unicode_count;
+        my $non_unicode_string;
+        if ($count > $MAX_UNICODE_CODEPOINTS) {
+            $unicode_count = $count - ($MAX_WORKING_CODEPOINT
+                                       - $MAX_UNICODE_CODEPOINT);
+            $non_unicode_string = "All above-Unicode code points match as well, and are also returned";
+        }
+        else {
+            $unicode_count = $count;
+            $non_unicode_string = "";
+        }
+        my $string_count = main::clarify_code_point_count($unicode_count);
  
          my $loose_count = 0;        # how many aliases loosely matched
          my $compound_name = "";     # ? Are any names compound?, and if so, an
@@ -7655,11 +8012,13 @@ END
              }
          } # End of looping through all tables
  
+        $matches_comment .= "\n$non_unicode_string\n" if $non_unicode_string;
+
  
          my $code_points;
          my $match;
          my $any_of_these;
-        if ($count == 1) {
+        if ($unicode_count == 1) {
              $match = 'matches';
              $code_points = 'single code point';
          }
@@ -7710,14 +8069,16 @@ END
          if ($total_entries == 0) {
              Carp::my_carp("No regular expression construct can match $leader, as all names for it are the null string.  Creating file anyway.");
              $comment .= <<END;
-This file returns the $code_points in Unicode Version $string_version for
+This file returns the $code_points in Unicode Version
+$string_version for
  $leader, but it is inaccessible through Perl regular expressions, as
  "\\p{prop=}" is not recognized.
  END
  
          } else {
              $comment .= <<END;
-This file returns the $code_points in Unicode Version $string_version that
+This file returns the $code_points in Unicode Version
+$string_version that
  $match$synonyms:
  
  $matches_comment
@@ -7757,14 +8118,25 @@ END
  
          if ($count) {   # The format differs if no code points, and needs no
                          # explanation in that case
+            if ($leader->write_as_invlist) {
                  $comment.= <<END;
  
-The format of the lines of this file is:
+The first data line of this file begins with the letter V to indicate it is in
+inversion list format.  The number following the V gives the number of lines
+remaining.  Each of those remaining lines is a single number representing the
+starting code point of a range which goes up to but not including the number
+on the next line; The 0th, 2nd, 4th... ranges are for code points that match
+the property; the 1st, 3rd, 5th... are ranges of code points that don't match
+the property.  The final line's range extends to the platform's infinity.
  END
-            $comment.= <<END;
+            }
+            else {
+                $comment.= <<END;
+The format of the lines of this file is:
  START\\tSTOP\\twhere START is the starting code point of the range, in hex;
  STOP is the ending point, or if omitted, the range has just one code point.
  END
+            }
              if ($leader->output_range_counts) {
                  $comment .= <<END;
  Numbers in comments in [brackets] indicate how many code points are in the
@@ -8060,8 +8432,12 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
                                  _Alias_Hash => $table_ref{$addr},
                                  _Property => $self,
  
-                                # gets property's fate and status by default
-                                Fate => $self->fate,
+                                # gets property's fate and status by default,
+                                # except if the name begind with an
+                                # underscore, default it to internal
+                                Fate => ($name =~ /^_/)
+                                         ? $INTERNAL_ONLY
+                                         : $self->fate,
                                  Status => $self->status,
                                  _Status_Info => $self->status_info,
                                  %args);
@@ -8084,10 +8460,10 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
              $type{$addr} = $NON_STRING;
          }
          elsif ($type{$addr} != $ENUM && $type{$addr} != $FORCED_BINARY) {
-            if (scalar main::uniques(values %{$table_ref{$addr}}) > 2
-                && $type{$addr} == $BINARY)
-            {
-                Carp::my_carp("$self now has more than 2 tables (with the addition of '$name'), and so is no longer binary.  Changing its type to 'enum'.  Bad News.");
+            if (scalar main::uniques(values %{$table_ref{$addr}}) > 2) {
+                if ($type{$addr} == $BINARY) {
+                    Carp::my_carp("$self now has more than 2 tables (with the addition of '$name'), and so is no longer binary.  Changing its type to 'enum'.  Bad News.");
+                }
                  $type{$addr} = $ENUM;
              }
          }
@@ -8173,10 +8549,13 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
  
          my $addr = do { no overloading; pack 'J', $self; };
  
-        # Swash names are used only on regular map tables; otherwise there
-        # should be no access to the property map table from other parts of
-        # Perl.
-        return if $map{$addr}->fate != $ORDINARY;
+        # Swash names are used only on either
+        # 1) legacy-only properties, because the formats for these are
+        #    unchangeable, and they have had these lines in them; or
+        # 2) regular map tables; otherwise there should be no access to the
+        #    property map table from other parts of Perl.
+        return if $map{$addr}->fate != $ORDINARY
+                  && $map{$addr}->fate != $LEGACY_ONLY;
  
          return $file{$addr} if defined $file{$addr};
          return $map{$addr}->external_name;
@@ -8399,8 +8778,8 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
              {
                  my $tables = $self->tables;
                  my $count = $self->count;
-                if ($verbosity && $count > 500 && $tables/$count > .1) {
-                    Carp::my_carp_bug("It appears that $self should be a \$STRING property, not an \$ENUM because it has too many match tables: $count\n");
+                if ($verbosity && $tables > 500 && $tables/$count > .1) {
+                    Carp::my_carp_bug("It appears that $self should be a \$STRING property, not an \$ENUM because it has too many match tables: $tables\n");
                  }
                  $self->set_type($ENUM);
              }
@@ -8461,6 +8840,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
                      initialize
                      inverse_list
                      is_empty
+                    replacement_property
                      name
                      note
                      perl_extension
@@ -8504,6 +8884,16 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
  
  package main;
  
+    sub display_chr {
+        # Converts an ordinal character value to a displayable string, using a
+        # NBSP to hold combining characters.
+
+        my $ord = shift;
+        my $chr = chr $ord;
+        return $chr if $ccc->table(0)->contains($ord);
+        return chr(utf8::unicode_to_native(0xA0)) . $chr;
+    }
+
  sub join_lines($) {
      # Returns lines of the input joined together, so that they can be folded
      # properly.
@@ -9194,7 +9584,7 @@ sub finish_property_setup {
      $gc->add_alias('Category');
  
      # Unicode::Normalize expects this file with this name and directory.
-    my $ccc = property_ref('Canonical_Combining_Class');
+    $ccc = property_ref('Canonical_Combining_Class');
      if (defined $ccc) {
          $ccc->set_file('CombiningClass');
          $ccc->set_directory(File::Spec->curdir());
@@ -11422,9 +11812,12 @@ sub filter_old_style_arabic_shaping {
          $uc = property_ref('uc');
  
          # For each of the case change mappings...
-        foreach my $full_table ($lc, $tc, $uc) {
-            my $full_name = $full_table->name;
-            unless (defined $full_table && ! $full_table->is_empty) {
+        foreach my $full_casing_table ($lc, $tc, $uc) {
+            my $full_casing_name = $full_casing_table->name;
+            my $full_casing_full_name = $full_casing_table->full_name;
+            unless (defined $full_casing_table
+                    && ! $full_casing_table->is_empty)
+            {
                  Carp::my_carp_bug("Need to process UnicodeData before SpecialCasing.  Only special casing will be generated.");
              }
  
@@ -11439,18 +11832,16 @@ sub filter_old_style_arabic_shaping {
              # full maps so that as we handle those, we can override the simple
              # maps for them in the legacy table, and merely add them in the
              # new-style one.
-            my $legacy = Property->new("Legacy_" . $full_table->full_name,
-                                        File => $full_table->full_name =~
-                                                            s/case_Mapping//r,
-                                        Range_Size_1 => 1,
-                                        Format => $HEX_FORMAT,
-                                        Default_Map => $CODE_POINT,
-                                        UCD => 0,
-                                        Initialize => $full_table,
-                                        To_Output_Map => $EXTERNAL_MAP,
+            my $legacy = Property->new("Legacy_" . $full_casing_full_name,
+                                File => $full_casing_full_name
+                                                          =~ s/case_Mapping//r,
+                                Format => $HEX_FORMAT,
+                                Default_Map => $CODE_POINT,
+                                Initialize => $full_casing_table,
+                                Replacement_Property => $full_casing_full_name,
              );
  
-            $full_table->add_comment(join_lines( <<END
+            $full_casing_table->add_comment(join_lines( <<END
  This file includes both the simple and full case changing maps.  The simple
  ones are in the main body of the table below, and the full ones adding to or
  overriding them are in the hash.
@@ -11459,9 +11850,9 @@ END
  
              # The simple version's name in each mapping merely has an 's' in
              # front of the full one's
-            my $simple_name = 's' . $full_name;
+            my $simple_name = 's' . $full_casing_name;
              my $simple = property_ref($simple_name);
-            $simple->initialize($full_table) if $simple->to_output_map();
+            $simple->initialize($full_casing_table) if $simple->to_output_map();
          }
  
          return;
@@ -11574,7 +11965,7 @@ END
                                               . $object->full_name
                                               . "; $fields[$i]");
  
-                # ... and, the The regular table, in which it is additional,
+                # ... and the regular table, in which it is additional,
                  # beyond the simple mapping.
                  $file->insert_adjusted_lines("$fields[0]; "
                                               . $object->name
@@ -12605,6 +12996,19 @@ END
                              Case_Folding
                          } )
      {
+        my $comment = <<END;
+
+Note that although the Perl core uses this file, it has the standard values
+for code points from U+0000 to U+00FF compiled in, so changing this table will
+not change the core's behavior with respect to these code points.  Use
+Unicode::Casing to override this table.
+END
+        if ($map eq 'Case_Folding') {
+            $comment .= <<END;
+(/i regex matching is not overridable except by using a custom regex engine)
+END
+        }
+        property_ref($map)->add_comment(join_lines($comment));
          my $simple = property_ref("Simple_$map");
          next if ! $simple->is_empty;
          if ($simple->to_output_map) {
@@ -12678,6 +13082,7 @@ END
          }
  
          # We should have enough data now to compute the type of the property.
+        my $property_name = $property->name;
          $property->compute_type;
          my $property_type = $property->type;
  
@@ -12688,8 +13093,14 @@ END
          # The Unicode db always (so far, and they claim into the future) have
          # the default for missing entries in binary properties be 'N' (unless
          # there is a '@missing' line that specifies otherwise)
-        if ($property_type == $BINARY && ! defined $property->default_map) {
-            $property->set_default_map('N');
+        if (! defined $property->default_map) {
+            if ($property_type == $BINARY) {
+                $property->set_default_map('N');
+            }
+            elsif ($property_type == $ENUM) {
+                Carp::my_carp("Property '$property_name doesn't have a default mapping.  Using a fake one");
+                $property->set_default_map('XXX This makes sure there is a default map');
+            }
          }
  
          # Add any remaining code points to the mapping, using the default for
@@ -12719,13 +13130,12 @@ END
                  # This fills in any missing values with the default.  It's not
                  # necessary to do this with binary properties, as the default
                  # is defined completely in terms of the Y table.
-                $property->add_map(0, $MAX_UNICODE_CODEPOINT,
+                $property->add_map(0, $MAX_WORKING_CODEPOINT,
                                     $default_map, Replace => $NO);
              }
          }
  
          # Have all we need to populate the match tables.
-        my $property_name = $property->name;
          my $maps_should_be_defined = $property->pre_declared_maps;
          foreach my $range ($property->ranges) {
              my $map = $range->value;
@@ -12835,13 +13245,10 @@ END
      # backwards compatibility with applications that read them directly.
      my $Digit = Property->new("Legacy_Perl_Decimal_Digit",
                                Default_Map => "",
-                              Perl_Extension => 1,
                                File => 'Digit',    # Trad. location
                                Directory => $map_directory,
-                              UCD => 0,
                                Type => $STRING,
-                              To_Output_Map => $EXTERNAL_MAP,
-                              Range_Size_1 => 1,
+                              Replacement_Property => "Perl_Decimal_Digit",
                                Initialize => property_ref('Perl_Decimal_Digit'),
                              );
      $Digit->add_comment(join_lines(<<END
@@ -12857,10 +13264,8 @@ END
                      File => "Fold",
                      Directory => $map_directory,
                      Default_Map => $CODE_POINT,
-                    UCD => 0,
-                    Range_Size_1 => 1,
                      Type => $STRING,
-                    To_Output_Map => $EXTERNAL_MAP,
+                    Replacement_Property => "Case_Folding",
                      Format => $HEX_FORMAT,
                      Initialize => property_ref('cf'),
      );
@@ -12934,30 +13339,35 @@ sub compile_perl() {
      # just isn't any Posix equivalent.  'XPosix' are the Posix tables extended
      # to the full Unicode range, by our guesses as to what is appropriate.
  
-    # 'Any' is all code points.  As an error check, instead of just setting it
+    # 'All' is all code points.  As an error check, instead of just setting it
      # to be that, construct it to be the union of all the major categories
-    $Any = $perl->add_match_table('Any',
-            Description  => "[\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]",
-            Matches_All => 1);
+    $All = $perl->add_match_table('All',
+      Description
+        => "All code points, including those above Unicode.  Same as qr/./s",
+      Matches_All => 1);
  
      foreach my $major_table ($gc->tables) {
  
          # Major categories are the ones with single letter names.
          next if length($major_table->name) != 1;
  
-        $Any += $major_table;
+        $All += $major_table;
      }
  
-    if ($Any->max != $MAX_UNICODE_CODEPOINT) {
+    if ($All->max != $MAX_WORKING_CODEPOINT) {
          Carp::my_carp_bug("Generated highest code point ("
-           . sprintf("%X", $Any->max)
-           . ") doesn't match expected value $MAX_UNICODE_CODEPOINT_STRING.")
+           . sprintf("%X", $All->max)
+           . ") doesn't match expected value $MAX_WORKING_CODEPOINT_STRING.")
      }
-    if ($Any->range_count != 1 || $Any->min != 0) {
-     Carp::my_carp_bug("Generated table 'Any' doesn't match all code points.")
+    if ($All->range_count != 1 || $All->min != 0) {
+     Carp::my_carp_bug("Generated table 'All' doesn't match all code points.")
      }
  
-    $Any->add_alias('All');
+    my $Any = $perl->add_match_table('Any',
+                                     Description  => "All Unicode code points: [\\x{0000}-\\x{10FFFF}]",
+                                     );
+    $Any->add_range(0, 0x10FFFF);
+    $Any->add_alias('Unicode');
  
      # Assigned is the opposite of gc=unassigned
      my $Assigned = $perl->add_match_table('Assigned',
@@ -13457,12 +13867,90 @@ sub compile_perl() {
      my $any_folds = $perl->add_match_table("_Perl_Any_Folds",
                      Description => "Code points that particpate in some fold",
                      );
-    #
-    foreach my $range (property_ref('Case_Folding')->ranges) {
-        $any_folds->add_range($range->start, $range->end);
-        foreach my $hex_code_point (split " ", $range->value) {
-            my $code_point = hex $hex_code_point;
-            $any_folds->add_range($code_point, $code_point);
+    my $loc_problem_folds = $perl->add_match_table(
+               "_Perl_Problematic_Locale_Folds",
+               Description =>
+                   "Code points that are in some way problematic under locale",
+    );
+
+    # This allows regexec.c to skip some work when appropriate.  Some of the
+    # entries in _Perl_Problematic_Locale_Folds are multi-character folds,
+    my $loc_problem_folds_start = $perl->add_match_table(
+               "_Perl_Problematic_Locale_Foldeds_Start",
+               Description =>
+                   "The first character of every sequence in _Perl_Problematic_Locale_Folds",
+    );
+
+    my $cf = property_ref('Case_Folding');
+
+    # Every character 0-255 is problematic because what each folds to depends
+    # on the current locale
+    $loc_problem_folds->add_range(0, 255);
+    $loc_problem_folds_start += $loc_problem_folds;
+
+    # Also problematic are anything these fold to outside the range.  Likely
+    # forever the only thing folded to by these outside the 0-255 range is the
+    # GREEK SMALL MU (from the MICRO SIGN), but it's easy to make the code
+    # completely general, which should catch any unexpected changes or errors.
+    # We look at each code point 0-255, and add its fold (including each part
+    # of a multi-char fold) to the list.  See commit message
+    # 31f05a37c4e9c37a7263491f2fc0237d836e1a80 for a more complete description
+    # of the MU issue.
+    foreach my $range ($loc_problem_folds->ranges) {
+        foreach my $code_point($range->start .. $range->end) {
+            my $fold_range = $cf->containing_range($code_point);
+            next unless defined $fold_range;
+
+            my @hex_folds = split " ", $fold_range->value;
+            my $start_cp = hex $hex_folds[0];
+            foreach my $i (0 .. @hex_folds - 1) {
+                my $cp = hex $hex_folds[$i];
+                next unless $cp > 255;    # Already have the < 256 ones
+
+                $loc_problem_folds->add_range($cp, $cp);
+                $loc_problem_folds_start->add_range($start_cp, $start_cp);
+            }
+        }
+    }
+
+    my $folds_to_multi_char = $perl->add_match_table(
+         "_Perl_Folds_To_Multi_Char",
+         Description =>
+              "Code points whose fold is a string of more than one character",
+    );
+
+    # Look through all the known folds to populate these tables.
+    foreach my $range ($cf->ranges) {
+        my $start = $range->start;
+        my $end = $range->end;
+        $any_folds->add_range($start, $end);
+
+        my @hex_folds = split " ", $range->value;
+        if (@hex_folds > 1) {   # Is multi-char fold
+            $folds_to_multi_char->add_range($start, $end);
+        }
+
+        my $found_locale_problematic = 0;
+
+        # Look at each of the folded-to characters...
+        foreach my $i (0 .. @hex_folds - 1) {
+            my $cp = hex $hex_folds[$i];
+            $any_folds->add_range($cp, $cp);
+
+            # The fold is problematic if any of the folded-to characters is
+            # already considered problematic.
+            if ($loc_problem_folds->contains($cp)) {
+                $loc_problem_folds->add_range($start, $end);
+                $found_locale_problematic = 1;
+            }
+        }
+
+        # If this is a problematic fold, add to the start chars the
+        # folding-from characters and first folded-to character.
+        if ($found_locale_problematic) {
+            $loc_problem_folds_start->add_range($start, $end);
+            my $cp = hex $hex_folds[0];
+            $loc_problem_folds_start->add_range($cp, $cp);
          }
      }
  
@@ -14006,8 +14494,9 @@ END
              $unassigned_sans_noncharacters &= $nonchars->table('N');
          }
  
-        for (my $i = 0; $i <= $MAX_UNICODE_CODEPOINT; $i++ ) {
+        for (my $i = 0; $i <= $MAX_UNICODE_CODEPOINT + 1; $i++ ) {
              $i = populate_char_info($i);    # Note sets $i so may cause skips
+
          }
      }
  
@@ -14347,8 +14836,9 @@ sub register_file_for_name($$$) {
      # Join all the file path components together, using slashes.
      my $full_filename = join('/', @$directory_ref, $file);
  
-    # All go in the same subdirectory of unicore
-    if ($directory_ref->[0] ne $matches_directory) {
+    # All go in the same subdirectory of unicore, or the special
+    # pseudo-directory '#'
+    if ($directory_ref->[0] !~ / ^ $matches_directory | \# $ /x) {
          Carp::my_carp("Unexpected directory in "
                  .  join('/', @{$directory_ref}, $file));
      }
@@ -14659,7 +15149,18 @@ sub make_re_pod_entries($) {
      my $full_name = $property->full_name;
  
      my $count = $input_table->count;
-    my $string_count = clarify_number($count);
+    my $unicode_count;
+    my $non_unicode_string;
+    if ($count > $MAX_UNICODE_CODEPOINTS) {
+        $unicode_count = $count - ($MAX_WORKING_CODEPOINT
+                                    - $MAX_UNICODE_CODEPOINT);
+        $non_unicode_string = " plus all above-Unicode code points";
+    }
+    else {
+        $unicode_count = $count;
+        $non_unicode_string = "";
+    }
+    my $string_count = clarify_number($unicode_count) . $non_unicode_string;
      my $status = $input_table->status;
      my $status_info = $input_table->status_info;
      my $caseless_equivalent = $input_table->caseless_equivalent;
@@ -14726,6 +15227,8 @@ sub make_re_pod_entries($) {
              # property always use the single form.
              if ($table_property == $perl) {
                  $entry = "\\p{$name}";
+                $entry .= " \\p$name" if length $name == 1; # Show non-braced
+                                                            # form too
                  $entry_ref = "\\p{$name}";
              }
              else {    # Compound form.
@@ -15475,7 +15978,7 @@ B<Compound forms> consist of two components, separated by an equals sign or a
  colon.  The first component is the property name, and the second component is
  the particular value of the property to match against, for example,
  C<\\p{Script: Greek}> and C<\\p{Script=Greek}> both mean to match characters
-whose Script property is Greek.
+whose Script property value is Greek.
  
  B<Single forms>, like C<\\p{Greek}>, are mostly Perl-defined shortcuts for
  their equivalent compound forms.  The table shows these equivalences.  (In our
@@ -15493,11 +15996,13 @@ for improved legibility.
  Also, white space, hyphens, and underscores are normally ignored
  everywhere between the {braces}, and hence can be freely added or removed
  even if the C</x> modifier hasn't been specified on the regular expression.
-But $a_bold_stricter at the beginning of an entry in the table below
+But in the table below $a_bold_stricter at the beginning of an entry
  means that tighter (stricter) rules are used for that entry:
  
  =over 4
  
+=over 4
+
  =item Single form (C<\\p{name}>) tighter rules:
  
  White space, hyphens, and underscores ARE significant
@@ -15525,11 +16030,15 @@ adjacent to (but within) the braces and the colon or equal sign.
  
  =back
  
+=back
+
  Some properties are considered obsolete by Unicode, but still available.
  There are several varieties of obsolescence:
  
  =over 4
  
+=over 4
+
  =item Stabilized
  
  A property may be stabilized.  Such a determination does not indicate
@@ -15573,16 +16082,21 @@ some of these extensions to be removed without warning, replaced by another
  property with the same name that means something different.  Use the
  equivalent shown instead.
  
+=back
+
  @block_warning
  
  The table below has two columns.  The left column contains the C<\\p{}>
  constructs to look up, possibly preceded by the flags mentioned above; and
  the right column contains information about them, like a description, or
-synonyms.  It shows both the single and compound forms for each property that
-has them.  If the left column is a short name for a property, the right column
-will give its longer, more descriptive name; and if the left column is the
-longest name, the right column will show any equivalent shortest name, in both
-single and compound forms if applicable.
+synonyms.  The table shows both the single and compound forms for each
+property that has them.  If the left column is a short name for a property,
+the right column will give its longer, more descriptive name; and if the left
+column is the longest name, the right column will show any equivalent shortest
+name, in both single and compound forms if applicable.
+
+If braces are not needed to specify a property (e.g., C<\\pL>), the left
+column contains both forms, with and without braces.
  
  The right column will also caution you if a property means something different
  than what might normally be expected.
@@ -15590,18 +16104,15 @@ than what might normally be expected.
  All single forms are Perl extensions; a few compound forms are as well, and
  are noted as such.
  
-Numbers in (parentheses) indicate the total number of code points matched by
-the property.  For emphasis, those properties that match no code points at all
-are listed as well in a separate section following the table.
+Numbers in (parentheses) indicate the total number of Unicode code points
+matched by the property.  For emphasis, those properties that match no code
+points at all are listed as well in a separate section following the table.
  
  Most properties match the same code points regardless of whether C<"/i">
  case-insensitive matching is specified or not.  But a few properties are
-affected.  These are shown with the notation
-
- (/i= other_property)
-
+affected.  These are shown with the notation S<C<(/i= I<other_property>)>>
  in the second column.  Under case-insensitive matching they match the
-same code pode points as the property "other_property".
+same code pode points as the property I<other_property>.
  
  There is no description given for most non-Perl defined properties (See
  L<$unicode_reference_url> for that).
@@ -15644,8 +16155,8 @@ B<*> is a wild-card
  
  =item *
  
-B<(\\d+)> in the info column gives the number of code points matched by
-this property.
+B<(\\d+)> in the info column gives the number of Unicode code points matched
+by this property.
  
  =item *
  
@@ -15808,6 +16319,9 @@ sub make_Heavy () {
      my $stricter_to_file_of = simple_dumper(\%stricter_to_file_of, ' ' x 4);
      chomp $stricter_to_file_of;
  
+    my $inline_definitions = simple_dumper(\@inline_definitions, " " x 4);
+    chomp $inline_definitions;
+
      my $loose_to_file_of = simple_dumper(\%loose_to_file_of, ' ' x 4);
      chomp $loose_to_file_of;
  
@@ -15823,7 +16337,13 @@ sub make_Heavy () {
      # for the alternate table figured out at that time.
      foreach my $cased (keys %caseless_equivalent_to) {
          my @path = $caseless_equivalent_to{$cased}->file_path;
-        my $path = join '/', @path[1, -1];
+        my $path;
+        if ($path[0] eq "#") {  # Pseudo-directory '#'
+            $path = join '/', @path;
+        }
+        else {  # Gets rid of lib/
+            $path = join '/', @path[1, -1];
+        }
          $caseless_equivalent_to{$cased} = $path;
      }
      my $caseless_equivalent_to
@@ -15849,12 +16369,22 @@ $INTERNAL_ONLY_HEADER
  $loose_property_name_of
  );
  
-# Maps property, table to file for those using stricter matching
+# Gives the definitions (in the form of inversion lists) for those properties
+# whose definitions aren't kept in files
+\@utf8::inline_definitions = (
+$inline_definitions
+);
+
+# Maps property, table to file for those using stricter matching.  For paths
+# whose directory is '#', the file is in the form of a numeric index into
+# \@inline_definitions
  \%utf8::stricter_to_file_of = (
  $stricter_to_file_of
  );
  
-# Maps property, table to file for those using loose matching
+# Maps property, table to file for those using loose matching.  For paths
+# whose directory is '#', the file is in the form of a numeric index into
+# \@inline_definitions
  \%utf8::loose_to_file_of = (
  $loose_to_file_of
  );
@@ -16406,9 +16936,9 @@ sub write_all_tables() {
      # don't care if both defined, as they had better be different anyway.)
      # The property named 'Perl' needs to be first (it doesn't have any
      # immutable file name) because empty properties are defined in terms of
-    # it's table named 'Any'.)   We also sort by the property's name.  This is
-    # just for repeatability of the outputs between runs of this program, but
-    # does not affect correctness.
+    # its table named 'All' under the -annotate option.)   We also sort by
+    # the property's name.  This is just for repeatability of the outputs
+    # between runs of this program, but does not affect correctness.
      PROPERTY:
      foreach my $property ($perl,
                            sort { return -1 if defined $a->file;
@@ -16524,8 +17054,12 @@ sub write_all_tables() {
                      push @unhandled_properties, "$table";
                  }
  
-                # An empty table is just the complement of everything.
-                $table->set_complement($Any) if $table != $property;
+                # The old way of expressing an empty match list was to
+                # complement the list that matches everything.  The new way is
+                # to create an empty inversion list, but this doesn't work for
+                # annotating, so use the old way then.
+                $table->set_complement($All) if $annotate
+                                                && $table != $property;
              }
              elsif ($expected_empty) {
                  my $because = "";
@@ -16557,13 +17091,13 @@ sub write_all_tables() {
  
              my $count = $table->count;
              if ($expected_full) {
-                if ($count != $MAX_UNICODE_CODEPOINTS) {
+                if ($count != $MAX_WORKING_CODEPOINTS) {
                      Carp::my_carp("$table matches only "
                      . clarify_number($count)
                      . " Unicode code points but should match "
-                    . clarify_number($MAX_UNICODE_CODEPOINTS)
+                    . clarify_number($MAX_WORKING_CODEPOINTS)
                      . " (off by "
-                    .  clarify_number(abs($MAX_UNICODE_CODEPOINTS - $count))
+                    .  clarify_number(abs($MAX_WORKING_CODEPOINTS - $count))
                      . ").  Proceeding anyway.");
                  }
  
@@ -16581,8 +17115,9 @@ sub write_all_tables() {
                  }
              }
              elsif ($count == $MAX_UNICODE_CODEPOINTS
+                   && $name ne "Any"
                     && ($table == $property || $table->leader == $table)
-                   && $table->property->status ne $PLACEHOLDER)
+                   && $table->property->status ne $NORMAL)
              {
                      Carp::my_carp("$table unexpectedly matches all Unicode code points.  Proceeding anyway.");
              }
@@ -16669,7 +17204,8 @@ sub write_all_tables() {
  
                  # We also create for Unicode::UCD a list of aliases for
                  # the property.  The list starts with the property name;
-                # then its full name.
+                # then its full name.  Legacy properties are not listed in
+                # Unicode::UCD.
                  my @property_list;
                  my @standard_list;
                  if ( $property->fate <= $MAP_PROXIED) {
@@ -16811,8 +17347,56 @@ sub write_all_tables() {
          my $filename;
          my $property = $table->property;
          my $is_property = ($table == $property);
-        if (! $is_property) {
  
+        # For very short tables, instead of writing them out to actual files,
+        # we in-line their inversion list definitions into Heavy.pl.  The
+        # definition replaces the file name, and the special pseudo-directory
+        # '#' is used to signal this.  This significantly cuts down the number
+        # of files written at little extra cost to the hashes in Heavy.pl.
+        # And it means, no run-time files to read to get the definitions.
+        if (! $is_property
+            && ! $annotate  # For annotation, we want to explicitly show
+                            # everything, so keep in files
+            && $table->ranges <= 3)
+        {
+            my @ranges = $table->ranges;
+            my $count = @ranges;
+            if ($count == 0) {  # 0th index reserved for 0-length lists
+                $filename = 0;
+            }
+            elsif ($table->leader != $table) {
+
+                # Here, is a table that is equivalent to another; code
+                # in register_file_for_name() causes its leader's definition
+                # to be used
+
+                next;
+            }
+            else {  # No equivalent table so far.
+
+                # Build up its definition range-by-range.
+                my $definition = "";
+                while (defined (my $range = shift @ranges)) {
+                    my $end = $range->end;
+                    if ($end < $MAX_WORKING_CODEPOINT) {
+                        $count++;
+                        $end = "\n" . ($end + 1);
+                    }
+                    else {  # Extends to infinity, hence no 'end'
+                        $end = "";
+                    }
+                    $definition .= "\n" . $range->start . $end;
+                }
+                $definition = "V$count" . $definition;
+                $filename = @inline_definitions;
+                push @inline_definitions, $definition;
+            }
+            @directory = "#";
+            register_file_for_name($table, \@directory, $filename);
+            next;
+        }
+
+        if (! $is_property) {
              # Match tables for the property go in lib/$subdirectory, which is
              # the property's name.  Don't use the standard file name for this,
              # as may get an unfamiliar alias
@@ -17303,10 +17887,12 @@ sub make_property_test_script() {
  
      &write($t_path,
             0,           # Not utf8;
-           [<DATA>,
+           [$HEADER,
+            <DATA>,
              @output,
              (map {"Test_X('$_');\n"} @backslash_X_tests),
-            "Finished();\n"]);
+            "Finished();\n"
+           ]);
      return;
  }
  
@@ -17695,6 +18281,13 @@ my @input_file_objects = (
                      Has_Missings_Defaults => $NOT_IGNORED,
                      Skip => "Provisional; for the analysis and processing of Indic scripts",
                      ),
+    Input_file->new('BidiBrackets.txt', v6.3.0,
+                    Properties => [ 'Bidi_Paired_Bracket', 'Bidi_Paired_Bracket_Type' ],
+                    Has_Missings_Defaults => $NO_DEFAULTS,
+                    ),
+    Input_file->new("BidiCharacterTest.txt", v6.3.0,
+                    Skip => 'Validation Tests',
+                    ),
  );
  
  # End of all the preliminaries.
@@ -17725,6 +18318,7 @@ File::Find::find({
  
  my @mktables_list_output_files;
  my $old_start_time = 0;
+my $old_options = "";
  
  if (! -e $file_list) {
      print "'$file_list' doesn't exist, so forcing rebuild.\n" if $verbosity >= $VERBOSE;
@@ -17747,8 +18341,13 @@ else {
          for my $list ( \@input, \@mktables_list_output_files ) {
              while (<$file_handle>) {
                  s/^ \s+ | \s+ $//xg;
-                if (/^ \s* \# .* Autogenerated\ starting\ on\ (\d+)/x) {
+                if (/^ \s* \# \s* Autogenerated\ starting\ on\ (\d+)/x) {
                      $old_start_time = $1;
+                    next;
+                }
+                if (/^ \s* \# \s* From\ options\ (.+) /x) {
+                    $old_options = $1;
+                    next;
                  }
                  next if /^ \s* (?: \# .* )? $/x;
                  last if /^ =+ $/x;
@@ -17881,7 +18480,9 @@ foreach my $in (@input_files) {
  
  my $rebuild = $write_unchanged_files    # Rebuild: if unconditional rebuild
                || ! scalar @mktables_list_output_files  # or if no outputs known
-              || $old_start_time < $most_recent;       # or out-of-date
+              || $old_start_time < $most_recent        # or out-of-date
+              || $old_options ne $command_line_arguments; # or with different
+                                                          # options
  
  # Now we check to see if any output files are older than youngest, if
  # they are, we need to continue on, otherwise we can presumably bail.
@@ -17952,6 +18553,7 @@ if ( $file_list and $make_list ) {
  # $file_list -- File list for $0.
  #
  #   Autogenerated starting on $start_time ($localtime)
+#   From options $command_line_arguments
  #
  # - First section is input files
  #   ($0 itself is not listed but is automatically considered an input)
@@ -18006,8 +18608,8 @@ use warnings;
  
  # If run outside the normal test suite on an ASCII platform, you can
  # just create a latin1_to_native() function that just returns its
-# inputs, because that's the only function used from test.pl
-require "test.pl";
+# inputs, because that's the only function used from charset_tools.pl
+require "charset_tools.pl";
  
  # Test qr/\X/ and the \p{} regular expression constructs.  This file is
  # constructed by mktables from the tables it generates, so if mktables is