mktables: Put off removing the \N{BELL} conflict

[perl5.git] / lib / unicore / mktables
diff --git a/lib/unicore/mktables b/lib/unicore/mktables

index b2ca9cc..e15a37e 100644 (file)
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -16,16 +16,25 @@
  # that instituted the change to main::objaddr, and subsequent commits that
  # changed 0+$self to pack 'J', $self.)
  
+my $start_time;
+BEGIN { # Get the time the script started running; do it at compilation to
+        # get it as close as possible
+    $start_time= time;
+}
+
+
  require 5.010_001;
  use strict;
  use warnings;
  use Carp;
+use Config;
  use File::Find;
  use File::Path;
  use File::Spec;
  use Text::Tabs;
  
  sub DEBUG () { 0 }  # Set to 0 for production; 1 for development
+my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/;
  
  ##########################################################################
  #
@@ -43,7 +52,7 @@ sub DEBUG () { 0 }  # Set to 0 for production; 1 for development
  #   the small actual loop to process the input files and finish up; then
  #   a __DATA__ section, for the .t tests
  #
-# This program works on all releases of Unicode through at least 5.2.  The
+# This program works on all releases of Unicode through at least 6.0.  The
  # outputs have been scrutinized most intently for release 5.1.  The others
  # have been checked for somewhat more than just sanity.  It can handle all
  # existing Unicode character properties in those releases.
@@ -155,7 +164,10 @@ my $map_directory = 'To';        # Where map files go.
  # out.  But all the ones which can be used in regular expression \p{} and \P{}
  # constructs will.  Generally a property will have either its map table or its
  # match tables written but not both.  Again, what gets written is controlled
-# by lists which can easily be changed.
+# by lists which can easily be changed.  Properties have a 'Type', like
+# binary, or string, or enum depending on how many match tables there are and
+# the content of the maps.  This 'Type' is different than a range 'Type', so
+# don't get confused by the two concepts having the same name.
  #
  # For information about the Unicode properties, see Unicode's UAX44 document:
  
@@ -176,11 +188,11 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  # More information on Unicode version glitches is further down in these
  # introductory comments.
  #
-# This program works on all properties as of 5.2, though the files for some
-# are suppressed from apparent lack of demand for them.  You can change which
-# are output by changing lists in this program.
+# This program works on all non-provisional properties as of 6.0, though the
+# files for some are suppressed from apparent lack of demand for them.  You
+# can change which are output by changing lists in this program.
  #
-# The old version of mktables emphasized the term "Fuzzy" to mean Unocde's
+# The old version of mktables emphasized the term "Fuzzy" to mean Unicode's
  # loose matchings rules (from Unicode TR18):
  #
  #    The recommended names for UCD properties and property values are in
@@ -350,6 +362,18 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  # to 1, and every file whose object is in @input_file_objects and doesn't have
  # a, 'non_skip => 1,' in its constructor will be skipped.
  #
+# To compare the output tables, it may be useful to specify the -annotate
+# flag.  This causes the tables to expand so there is one entry for each
+# non-algorithmically named code point giving, currently its name, and its
+# graphic representation if printable (and you have a font that knows about
+# it).  This makes it easier to see what the particular code points are in
+# each output table.  The tables are usable, but because they don't have
+# ranges (for the most part), a Perl using them will run slower.  Non-named
+# code points are annotated with a description of their status, and contiguous
+# ones with the same description will be output as a range rather than
+# individually.  Algorithmically named characters are also output as ranges,
+# except when there are just a few contiguous ones.
+#
  # FUTURE ISSUES
  #
  # The program would break if Unicode were to change its names so that
@@ -399,7 +423,7 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  # Unicode_Radical_Stroke was listed in those files, so if the Unihan database
  # is present in the directory, a table will be generated for that property.
  # In 5.2, several more properties were added.  For your convenience, the two
-# arrays are initialized with all the 5.2 listed properties that are also in
+# arrays are initialized with all the 6.0 listed properties that are also in
  # earlier releases.  But these are commented out.  You can just uncomment the
  # ones you want, or use them as a template for adding entries for other
  # properties.
@@ -454,7 +478,7 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  #
  # Here are some observations about some of the issues in early versions:
  #
-# The number of code points in \p{alpha} halve in 2.1.9.  It turns out that
+# The number of code points in \p{alpha} halved in 2.1.9.  It turns out that
  # the reason is that the CJK block starting at 4E00 was removed from PropList,
  # and was not put back in until 3.1.0
  #
@@ -614,11 +638,10 @@ my $make_list = 1;             # ? Should we write $file_list.  Set to always
                                 # special things
  my $glob_list = 0;             # ? Should we try to include unknown .txt files
                                 # in the input.
-my $output_range_counts = 1;   # ? Should we include the number of code points
-                               # in ranges in the output
-my $output_names = 0;          # ? Should character names be in the output
-my @viacode;                   # Contains the 1 million character names, if
-                               # $output_names is true
+my $output_range_counts = $debugging_build;   # ? Should we include the number
+                                              # of code points in ranges in
+                                              # the output
+my $annotate = 0;              # ? Should character names be in the output
  
  # Verbosity levels; 0 is quiet
  my $NORMAL_VERBOSITY = 1;
@@ -675,8 +698,10 @@ while (@ARGV) {
      elsif ($arg eq '-c') {
          $output_range_counts = ! $output_range_counts
      }
-    elsif ($arg eq '-output_names') {
-        $output_names = 1;
+    elsif ($arg eq '-annotate') {
+        $annotate = 1;
+        $debugging_build = 1;
+        $output_range_counts = 1;
      }
      else {
          my $with_c = 'with';
@@ -702,9 +727,10 @@ usage: $0 [-c|-p|-q|-v|-w] [-C dir] [-L filelist] [ -P pod_dir ]
    -maketest   : Make test script 'TestProp.pl' in current (or -C directory),
                  overrides -T
    -makelist   : Rewrite the file list $file_list based on current setup
-  -output_names : Output each character's name in the table files; useful for
-                doing what-ifs, looking at diffs; is slow, memory intensive,
-                resulting tables are usable but very large.
+  -annotate   : Output an annotation for each character in the table files;
+                useful for debugging mktables, looking at diffs; but is slow,
+                memory intensive; resulting tables are usable but slow and
+                very large.
    -check A B  : Executes $0 only if A and B are the same
  END
      }
@@ -712,7 +738,7 @@ END
  
  # Stores the most-recently changed file.  If none have changed, can skip the
  # build
-my $youngest = -M $0;   # Do this before the chdir!
+my $most_recent = (stat $0)[9];   # Do this before the chdir!
  
  # Change directories now, because need to read 'version' early.
  if ($use_directory) {
@@ -787,7 +813,7 @@ if ($v_version gt v3.2.0) {
                                  'Canonical_Combining_Class=Attached_Below_Left'
  }
  
-# These are listed in the Property aliases file in 5.2, but Unihan is ignored
+# These are listed in the Property aliases file in 6.0, but Unihan is ignored
  # unless explicitly added.
  if ($v_version ge v5.2.0) {
      my $unihan = 'Unihan; remove from list if using Unihan';
@@ -813,6 +839,26 @@ if ($v_version ge v5.2.0) {
      }
  }
  
+# Enum values for to_output_map() method in the Map_Table package.
+my $EXTERNAL_MAP = 1;
+my $INTERNAL_MAP = 2;
+
+# To override computed values for writing the map tables for these properties.
+# The default for enum map tables is to write them out, so that the Unicode
+# .txt files can be removed, but all the data to compute any property value
+# for any code point is available in a more compact form.
+my %global_to_output_map = (
+    # Needed by UCD.pm, but don't want to publicize that it exists, so won't
+    # get stuck supporting it if things change.  Sinc it is a STRING property,
+    # it normally would be listed in the pod, but INTERNAL_MAP suppresses
+    # that.
+    Unicode_1_Name => $INTERNAL_MAP,
+
+    Present_In => 0,                # Suppress, as easily computed from Age
+    Canonical_Combining_Class => 0, # Duplicate of CombiningClass.pl
+    Block => 0,                     # Suppress, as Blocks.txt is retained.
+);
+
  # Properties that this program ignores.
  my @unimplemented_properties = (
  'Unicode_Radical_Stroke'    # Remove if changing to handle this one.
@@ -830,10 +876,10 @@ my %why_obsolete;    # Documentation only
  
      my $other_properties = 'other properties';
      my $contributory = "Used by Unicode internally for generating $other_properties and not intended to be used stand-alone";
-    my $why_no_expand  = "Easily computed, and yet doesn't cover the common encoding forms (UTF-16/8)",
+    my $why_no_expand  = "Deprecated by Unicode.  These are characters that expand to more than one character in the specified normalization form, but whether they actually take up more bytes or not depends on the encoding being used.  For example, a UTF-8 encoded character may expand to a different number of bytes than a UTF-32 encoded character.";
  
      %why_deprecated = (
-        'Grapheme_Link' => 'Deprecated by Unicode.  Use ccc=vr (Canonical_Combining_Class=Virama) instead',
+        'Grapheme_Link' => 'Deprecated by Unicode:  Duplicates ccc=vr (Canonical_Combining_Class=Virama)',
          'Jamo_Short_Name' => $contributory,
          'Line_Break=Surrogate' => 'Deprecated by Unicode because surrogates should never appear in well-formed text, and therefore shouldn\'t be the basis for line breaking',
          'Other_Alphabetic' => $contributory,
@@ -847,14 +893,13 @@ my %why_obsolete;    # Documentation only
      );
  
      %why_suppressed = (
-        # There is a lib/unicore/Decomposition.pl (used by normalize.pm) which
+        # There is a lib/unicore/Decomposition.pl (used by Normalize.pm) which
          # contains the same information, but without the algorithmically
          # determinable Hangul syllables'.  This file is not published, so it's
          # existence is not noted in the comment.
          'Decomposition_Mapping' => 'Accessible via Unicode::Normalize',
  
          'ISO_Comment' => 'Apparently no demand for it, but can access it through Unicode::UCD::charinfo.  Obsoleted, and code points for it removed in Unicode 5.2',
-        'Unicode_1_Name' => "$simple, and no apparent demand for it, but can access it through Unicode::UCD::charinfo.  If there is no later name for a code point, then this one is used instead in charnames",
  
          'Simple_Case_Folding' => "$simple.  Can access this through Unicode::UCD::casefold",
          'Simple_Lowercase_Mapping' => "$simple.  Can access this through Unicode::UCD::charinfo",
@@ -864,10 +909,7 @@ my %why_obsolete;    # Documentation only
          'Name' => "Accessible via 'use charnames;'",
          'Name_Alias' => "Accessible via 'use charnames;'",
  
-        # These are sort of jumping the gun; deprecation is proposed for
-        # Unicode version 6.0, but they have never been exposed by Perl, and
-        # likely are soon to be deprecated, so best not to expose them.
-        FC_NFKC_Closure => 'Use NFKC_Casefold instead',
+        FC_NFKC_Closure => 'Supplanted in usage by NFKC_Casefold; otherwise not useful',
          Expands_On_NFC => $why_no_expand,
          Expands_On_NFD => $why_no_expand,
          Expands_On_NFKC => $why_no_expand,
@@ -889,9 +931,15 @@ my %why_obsolete;    # Documentation only
  
  if ($v_version ge 4.0.0) {
      $why_stabilized{'Hyphen'} = 'Use the Line_Break property instead; see www.unicode.org/reports/tr14';
+    if ($v_version ge 6.0.0) {
+        $why_deprecated{'Hyphen'} = 'Supplanted by Line_Break property values; see www.unicode.org/reports/tr14';
+    }
  }
-if ($v_version ge 5.2.0) {
+if ($v_version ge 5.2.0 && $v_version lt 6.0.0) {
      $why_obsolete{'ISO_Comment'} = 'Code points for it have been removed';
+    if ($v_version ge 6.0.0) {
+        $why_deprecated{'ISO_Comment'} = 'No longer needed for chart generation; otherwise not useful, and code points for it have been removed';
+    }
  }
  
  # Probably obsolete forever
@@ -910,7 +958,7 @@ END
  
  # If you are using the Unihan database, you need to add the properties that
  # you want to extract from it to this table.  For your convenience, the
-# properties in the 5.2 PropertyAliases.txt file are listed, commented out
+# properties in the 6.0 PropertyAliases.txt file are listed, commented out
  my @cjk_properties = split "\n", <<'END';
  #cjkAccountingNumeric; kAccountingNumeric
  #cjkOtherNumeric; kOtherNumeric
@@ -929,7 +977,7 @@ my @cjk_properties = split "\n", <<'END';
  END
  
  # Similarly for the property values.  For your convenience, the lines in the
-# 5.2 PropertyAliases.txt file are listed.  Just remove the first BUT NOT both
+# 6.0 PropertyAliases.txt file are listed.  Just remove the first BUT NOT both
  # '#' marks
  my @cjk_property_values = split "\n", <<'END';
  ## @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
@@ -1012,6 +1060,10 @@ my %ignored_files = (
      'ReadMe.txt' => 'Just comments',
      'README.TXT' => 'Just comments',
      'StandardizedVariants.txt' => 'Only for glyph changes, not a Unicode character property.  Does not fit into current scheme where one code point is mapped',
+    'EmojiSources.txt' => 'Not of general utility: for Japanese legacy cell-phone applications',
+    'IndicMatraCategory.txt' => 'Provisional',
+    'IndicSyllabicCategory.txt' => 'Provisional',
+    'ScriptExtensions.txt' => 'Provisional',
  );
  
  ### End of externally interesting definitions, except for @input_file_objects
@@ -1103,6 +1155,7 @@ my $IF_NOT_EQUIVALENT = 1; # Replace only under certain conditions; details in
  my $UNCONDITIONALLY = 2;   # Replace without conditions.
  my $MULTIPLE = 4;          # Don't replace, but add a duplicate record if
                             # already there
+my $CROAK = 5;             # Die with an error if is already there
  
  # Flags to give property statuses.  The phrases are to remind maintainers that
  # if the flag is changed, the indefinite article referring to it in the
@@ -1135,7 +1188,8 @@ my %status_past_participles = (
      $DEPRECATED => 'deprecated',
  );
  
-# The format of the values of the map tables:
+# The format of the values of the tables:
+my $EMPTY_FORMAT = "";
  my $BINARY_FORMAT = 'b';
  my $DECIMAL_FORMAT = 'd';
  my $FLOAT_FORMAT = 'f';
@@ -1143,6 +1197,7 @@ my $INTEGER_FORMAT = 'i';
  my $HEX_FORMAT = 'x';
  my $RATIONAL_FORMAT = 'r';
  my $STRING_FORMAT = 's';
+my $DECOMP_STRING_FORMAT = 'c';
  
  my %map_table_formats = (
      $BINARY_FORMAT => 'binary',
@@ -1151,7 +1206,8 @@ my %map_table_formats = (
      $INTEGER_FORMAT => 'integer',
      $HEX_FORMAT => 'positive hex whole number; a code point',
      $RATIONAL_FORMAT => 'rational: an integer or a fraction',
-    $STRING_FORMAT => 'arbitrary string',
+    $STRING_FORMAT => 'string',
+    $DECOMP_STRING_FORMAT => 'Perl\'s internal (Normalize.pm) decomposition mapping',
  );
  
  # Unicode didn't put such derived files in a separate directory at first.
@@ -1167,6 +1223,18 @@ my %nv_floating_to_rational; # maps numeric values floating point numbers to
                               # their rational equivalent
  my %loose_property_name_of; # Loosely maps property names to standard form
  
+# Most properties are immune to caseless matching, otherwise you would get
+# nonsensical results, as properties are a function of a code point, not
+# everything that is caselessly equivalent to that code point.  For example,
+# Changes_When_Case_Folded('s') should be false, whereas caselessly it would
+# be true because 's' and 'S' are equivalent caselessly.  However,
+# traditionally, [:upper:] and [:lower:] are equivalent caselessly, so we
+# extend that concept to those very few properties that are like this.  Each
+# such property will match the full range caselessly.  They are hard-coded in
+# the program; it's not worth trying to make it general as it's extremely
+# unlikely that they will ever change.
+my %caseless_equivalent_to;
+
  # These constants names and values were taken from the Unicode standard,
  # version 5.1, section 3.12.  They are used in conjunction with Hangul
  # syllables.  The '_string' versions are so generated tables can retain the
@@ -1214,6 +1282,8 @@ my $MAX_FLOATING_SLOP = 10 ** - $MIN_FRACTION_LENGTH; # And in floating terms
  my $gc;
  my $perl;
  my $block;
+my $perl_charname;
+my $print;
  
  # Are there conflicting names because of beginning with 'In_', or 'Is_'
  my $has_In_conflicts = 0;
@@ -1248,6 +1318,142 @@ sub objaddr($) {
      return pack 'J', $_[0];
  }
  
+# These are used only if $annotate is true.
+# The entire range of Unicode characters is examined to populate these
+# after all the input has been processed.  But most can be skipped, as they
+# have the same descriptive phrases, such as being unassigned
+my @viacode;            # Contains the 1 million character names
+my @printable;          # boolean: And are those characters printable?
+my @annotate_char_type; # Contains a type of those characters, specifically
+                        # for the purposes of annotation.
+my $annotate_ranges;    # A map of ranges of code points that have the same
+                        # name for the purposes of annotation.  They map to the
+                        # upper edge of the range, so that the end point can
+                        # be immediately found.  This is used to skip ahead to
+                        # the end of a range, and avoid processing each
+                        # individual code point in it.
+my $unassigned_sans_noncharacters; # A Range_List of the unassigned
+                                   # characters, but excluding those which are
+                                   # also noncharacter code points
+
+# The annotation types are an extension of the regular range types, though
+# some of the latter are folded into one.  Make the new types negative to
+# avoid conflicting with the regular types
+my $SURROGATE_TYPE = -1;
+my $UNASSIGNED_TYPE = -2;
+my $PRIVATE_USE_TYPE = -3;
+my $NONCHARACTER_TYPE = -4;
+my $CONTROL_TYPE = -5;
+my $UNKNOWN_TYPE = -6;  # Used only if there is a bug in this program
+
+sub populate_char_info ($) {
+    # Used only with the $annotate option.  Populates the arrays with the
+    # input code point's info that are needed for outputting more detailed
+    # comments.  If calling context wants a return, it is the end point of
+    # any contiguous range of characters that share essentially the same info
+
+    my $i = shift;
+    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+    $viacode[$i] = $perl_charname->value_of($i) || "";
+
+    # A character is generally printable if Unicode says it is,
+    # but below we make sure that most Unicode general category 'C' types
+    # aren't.
+    $printable[$i] = $print->contains($i);
+
+    $annotate_char_type[$i] = $perl_charname->type_of($i) || 0;
+
+    # Only these two regular types are treated specially for annotations
+    # purposes
+    $annotate_char_type[$i] = 0 if $annotate_char_type[$i] != $CP_IN_NAME
+                                && $annotate_char_type[$i] != $HANGUL_SYLLABLE;
+
+    # Give a generic name to all code points that don't have a real name.
+    # We output ranges, if applicable, for these.  Also calculate the end
+    # point of the range.
+    my $end;
+    if (! $viacode[$i]) {
+        if ($gc-> table('Surrogate')->contains($i)) {
+            $viacode[$i] = 'Surrogate';
+            $annotate_char_type[$i] = $SURROGATE_TYPE;
+            $printable[$i] = 0;
+            $end = $gc->table('Surrogate')->containing_range($i)->end;
+        }
+        elsif ($gc-> table('Private_use')->contains($i)) {
+            $viacode[$i] = 'Private Use';
+            $annotate_char_type[$i] = $PRIVATE_USE_TYPE;
+            $printable[$i] = 0;
+            $end = $gc->table('Private_Use')->containing_range($i)->end;
+        }
+        elsif (Property::property_ref('Noncharacter_Code_Point')-> table('Y')->
+                                                                contains($i))
+        {
+            $viacode[$i] = 'Noncharacter';
+            $annotate_char_type[$i] = $NONCHARACTER_TYPE;
+            $printable[$i] = 0;
+            $end = property_ref('Noncharacter_Code_Point')->table('Y')->
+                                                    containing_range($i)->end;
+        }
+        elsif ($gc-> table('Control')->contains($i)) {
+            $viacode[$i] = 'Control';
+            $annotate_char_type[$i] = $CONTROL_TYPE;
+            $printable[$i] = 0;
+            $end = 0x81 if $i == 0x80;  # Hard-code this one known case
+        }
+        elsif ($gc-> table('Unassigned')->contains($i)) {
+            $viacode[$i] = 'Unassigned, block=' . $block-> value_of($i);
+            $annotate_char_type[$i] = $UNASSIGNED_TYPE;
+            $printable[$i] = 0;
+
+            # Because we name the unassigned by the blocks they are in, it
+            # can't go past the end of that block, and it also can't go past
+            # the unassigned range it is in.  The special table makes sure
+            # that the non-characters, which are unassigned, are separated
+            # out.
+            $end = min($block->containing_range($i)->end,
+                       $unassigned_sans_noncharacters-> containing_range($i)->
+                                                                         end);
+        }
+        else {
+            Carp::my_carp_bug("Can't figure out how to annotate "
+                              . sprintf("U+%04X", $i)
+                              . ".  Proceeding anyway.");
+            $viacode[$i] = 'UNKNOWN';
+            $annotate_char_type[$i] = $UNKNOWN_TYPE;
+            $printable[$i] = 0;
+        }
+    }
+
+    # Here, has a name, but if it's one in which the code point number is
+    # appended to the name, do that.
+    elsif ($annotate_char_type[$i] == $CP_IN_NAME) {
+        $viacode[$i] .= sprintf("-%04X", $i);
+        $end = $perl_charname->containing_range($i)->end;
+    }
+
+    # And here, has a name, but if it's a hangul syllable one, replace it with
+    # the correct name from the Unicode algorithm
+    elsif ($annotate_char_type[$i] == $HANGUL_SYLLABLE) {
+        use integer;
+        my $SIndex = $i - $SBase;
+        my $L = $LBase + $SIndex / $NCount;
+        my $V = $VBase + ($SIndex % $NCount) / $TCount;
+        my $T = $TBase + $SIndex % $TCount;
+        $viacode[$i] = "HANGUL SYLLABLE $Jamo{$L}$Jamo{$V}";
+        $viacode[$i] .= $Jamo{$T} if $T != $TBase;
+        $end = $perl_charname->containing_range($i)->end;
+    }
+
+    return if ! defined wantarray;
+    return $i if ! defined $end;    # If not a range, return the input
+
+    # Save this whole range so can find the end point quickly
+    $annotate_ranges->add_map($i, $end, $end);
+
+    return $end;
+}
+
  # Commented code below should work on Perl 5.8.
  ## This 'require' doesn't necessarily work in miniperl, and even if it does,
  ## the native perl version of it (which is what would operate under miniperl)
@@ -2845,8 +3051,8 @@ sub trace { return main::trace(@_); }
          return $i + 1;
      }
  
-    sub value_of {
-        # Returns the value associated with the code point, undef if none
+    sub containing_range {
+        # Returns the range object that contains the code point, undef if none
  
          my $self = shift;
          my $codepoint = shift;
@@ -2857,7 +3063,34 @@ sub trace { return main::trace(@_); }
  
          # contains() returns 1 beyond where we should look
          no overloading;
-        return $ranges{pack 'J', $self}->[$i-1]->value;
+        return $ranges{pack 'J', $self}->[$i-1];
+    }
+
+    sub value_of {
+        # Returns the value associated with the code point, undef if none
+
+        my $self = shift;
+        my $codepoint = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $range = $self->containing_range($codepoint);
+        return unless defined $range;
+
+        return $range->value;
+    }
+
+    sub type_of {
+        # Returns the type of the range containing the code point, undef if
+        # the code point is not in the table
+
+        my $self = shift;
+        my $codepoint = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $range = $self->containing_range($codepoint);
+        return unless defined $range;
+
+        return $range->type;
      }
  
      sub _search_ranges {
@@ -3019,7 +3252,9 @@ sub trace { return main::trace(@_); }
          #                         existing one, but has a different value,
          #                         don't replace the existing one, but insert
          #                         this, one so that the same range can occur
-        #                         multiple times.
+        #                         multiple times.  They are stored LIFO, so
+        #                         that the final one inserted is the first one
+        #                         returned in an ordered search of the table.
          #       => anything else  is the same as => $IF_NOT_EQUIVALENT
          #
          # "same value" means identical for non-type-0 ranges, and it means
@@ -3248,23 +3483,60 @@ sub trace { return main::trace(@_); }
              return;
          }
  
-        # Here, we have taken care of the case where $replace is $NO, which
-        # means that whatever action we now take is done unconditionally.  It
-        # still could be that this call will result in a no-op, if duplicates
-        # aren't allowed, and we are inserting a range that merely duplicates
-        # data already in the range list; or also if deleting a non-existent
-        # range.
-        # $i still points to the first potential affected range.  Now find the
-        # highest range affected, which will determine the length parameter to
-        # splice.  (The input range can span multiple existing ones.)  While
-        # we are looking through the range list, see also if this is an
-        # insertion that will change the values of at least one of the
-        # affected ranges.  We don't need to do this check unless this is an
-        # insertion of non-multiples, and also since this is a boolean, we
-        # don't need to do it if have already determined that it will make a
-        # change; just unconditionally change them.  $cdm is created to be 1
-        # if either of these is true. (The 'c' in the name comes from below)
-        my $cdm = ($operation eq '-' || $replace == $MULTIPLE);
+        # Here, we have taken care of the case where $replace is $NO.
+        # Remember that here, r[$i-1]->end < $start <= r[$i]->end
+        # If inserting a multiple record, this is where it goes, before the
+        # first (if any) existing one.  This implies an insertion, and no
+        # change to any existing ranges.  Note that $i can be -1 if this new
+        # range doesn't actually duplicate any existing, and comes at the
+        # beginning of the list.
+        if ($replace == $MULTIPLE) {
+
+            if ($start != $end) {
+                Carp::my_carp_bug("$owner_name_of{$addr}Can't cope with adding a multiple record when the range ($start..$end) contains more than one code point.  No action taken.");
+                return;
+            }
+
+            # Don't add an exact duplicate, as it isn't really a multiple
+            if ($end >= $r->[$i]->start) {
+                if ($r->[$i]->start != $r->[$i]->end) {
+                    Carp::my_carp_bug("$owner_name_of{$addr}Can't cope with adding a multiple record when the other range ($r->[$i]) contains more than one code point.  No action taken.");
+                    return;
+                }
+                return if $value eq $r->[$i]->value && $type eq $r->[$i]->type;
+            }
+
+            trace "Adding multiple record at $i with $start..$end, $value" if main::DEBUG && $to_trace;
+            my @return = splice @$r,
+                                $i,
+                                0,
+                                Range->new($start,
+                                           $end,
+                                           Value => $value,
+                                           Type => $type);
+            if (main::DEBUG && $to_trace) {
+                trace "After splice:";
+                trace 'i-2=[', $i-2, ']', $r->[$i-2] if $i >= 2;
+                trace 'i-1=[', $i-1, ']', $r->[$i-1] if $i >= 1;
+                trace "i  =[", $i, "]", $r->[$i] if $i >= 0;
+                trace 'i+1=[', $i+1, ']', $r->[$i+1] if $i < @$r - 1;
+                trace 'i+2=[', $i+2, ']', $r->[$i+2] if $i < @$r - 2;
+                trace 'i+3=[', $i+3, ']', $r->[$i+3] if $i < @$r - 3;
+            }
+            return @return;
+        }
+
+        # Here, we have taken care of $NO and $MULTIPLE replaces.  This leaves
+        # delete, insert, and replace either unconditionally or if not
+        # equivalent.  $i still points to the first potential affected range.
+        # Now find the highest range affected, which will determine the length
+        # parameter to splice.  (The input range can span multiple existing
+        # ones.)  If this isn't a deletion, while we are looking through the
+        # range list, see also if this is a replacement rather than a clean
+        # insertion; that is if it will change the values of at least one
+        # existing range.  Start off assuming it is an insert, until find it
+        # isn't.
+        my $clean_insert = $operation eq '+';
          my $j;        # This will point to the highest affected range
  
          # For non-zero types, the standard form is the value itself;
@@ -3277,12 +3549,19 @@ sub trace { return main::trace(@_); }
              # searching
              last if $end < $r->[$j]->start;
  
-            # Here, overlaps the range at $j.  If the value's don't match,
-            # and this is supposedly an insertion, it becomes a change
-            # instead.  This is what the 'c' stands for in $cdm.
-            if (! $cdm) {
+            # Here, overlaps the range at $j.  If the values don't match,
+            # and so far we think this is a clean insertion, it becomes a
+            # non-clean insertion, i.e., a 'change' or 'replace' instead.
+            if ($clean_insert) {
                  if ($r->[$j]->standard_form ne $standard_form) {
-                    $cdm = 1;
+                    $clean_insert = 0;
+                    if ($replace == $CROAK) {
+                        main::croak("The range to add "
+                        . sprintf("%04X", $start)
+                        . '-'
+                        . sprintf("%04X", $end)
+                        . " with value '$value' overlaps an existing range $r->[$j]");
+                    }
                  }
                  else {
  
@@ -3296,7 +3575,7 @@ sub trace { return main::trace(@_); }
                          # same, but the non-standardized values aren't.  If
                          # replacing unconditionally, then replace
                          if( $replace == $UNCONDITIONALLY) {
-                            $cdm = 1;
+                            $clean_insert = 0;
                          }
                          else {
  
@@ -3310,13 +3589,13 @@ sub trace { return main::trace(@_); }
                                              && $pre_existing =~ /[a-z]/;
  
                              if ($old_mixed != $new_mixed) {
-                                $cdm = 1 if $new_mixed;
+                                $clean_insert = 0 if $new_mixed;
                                  if (main::DEBUG && $to_trace) {
-                                    if ($cdm) {
-                                        trace "Replacing $pre_existing with $value";
+                                    if ($clean_insert) {
+                                        trace "Retaining $pre_existing over $value";
                                      }
                                      else {
-                                        trace "Retaining $pre_existing over $value";
+                                        trace "Replacing $pre_existing with $value";
                                      }
                                  }
                              }
@@ -3330,13 +3609,13 @@ sub trace { return main::trace(@_); }
                                  my $old_punct = $pre_existing =~ /[-_]/;
  
                                  if ($old_punct != $new_punct) {
-                                    $cdm = 1 if $new_punct;
+                                    $clean_insert = 0 if $new_punct;
                                      if (main::DEBUG && $to_trace) {
-                                        if ($cdm) {
-                                            trace "Replacing $pre_existing with $value";
+                                        if ($clean_insert) {
+                                            trace "Retaining $pre_existing over $value";
                                          }
                                          else {
-                                            trace "Retaining $pre_existing over $value";
+                                            trace "Replacing $pre_existing with $value";
                                          }
                                      }
                                  }   # else existing one is just as "good";
@@ -3359,44 +3638,6 @@ sub trace { return main::trace(@_); }
          $j--;        # $j now points to the highest affected range.
          trace "Final affected range is $j: $r->[$j]" if main::DEBUG && $to_trace;
  
-        # If inserting a multiple record, this is where it goes, after all the
-        # existing ones for this range.  This implies an insertion, and no
-        # change to any existing ranges.  Note that $j can be -1 if this new
-        # range doesn't actually duplicate any existing, and comes at the
-        # beginning of the list, in which case we can handle it like any other
-        # insertion, and is easier to do so.
-        if ($replace == $MULTIPLE && $j >= 0) {
-
-            # This restriction could be remedied with a little extra work, but
-            # it won't hopefully ever be necessary
-            if ($r->[$j]->start != $r->[$j]->end) {
-                Carp::my_carp_bug("$owner_name_of{$addr}Can't cope with adding a multiple when the other range ($r->[$j]) contains more than one code point.  No action taken.");
-                return;
-            }
-
-            # Don't add an exact duplicate, as it isn't really a multiple
-            return if $value eq $r->[$j]->value && $type eq $r->[$j]->type;
-
-            trace "Adding multiple record at $j+1 with $start..$end, $value" if main::DEBUG && $to_trace;
-            my @return = splice @$r,
-                                $j+1,
-                                0,
-                                Range->new($start,
-                                           $end,
-                                           Value => $value,
-                                           Type => $type);
-            if (main::DEBUG && $to_trace) {
-                trace "After splice:";
-                trace 'j-2=[', $j-2, ']', $r->[$j-2] if $j >= 2;
-                trace 'j-1=[', $j-1, ']', $r->[$j-1] if $j >= 1;
-                trace "j  =[", $j, "]", $r->[$j] if $j >= 0;
-                trace 'j+1=[', $j+1, ']', $r->[$j+1] if $j < @$r - 1;
-                trace 'j+2=[', $j+2, ']', $r->[$j+2] if $j < @$r - 2;
-                trace 'j+3=[', $j+3, ']', $r->[$j+3] if $j < @$r - 3;
-            }
-            return @return;
-        }
-
          # Here, have taken care of $NO and $MULTIPLE replaces.
          # $j points to the highest affected range.  But it can be < $i or even
          # -1.  These happen only if the insertion is entirely in the gap
@@ -3422,8 +3663,9 @@ sub trace { return main::trace(@_); }
          }
          else {
  
-            # Here the entire input range is not in the gap before $i.  There
-            # is an affected one, and $j points to the highest such one.
+            # Here part of the input range is not in the gap before $i.  Thus,
+            # there is at least one affected one, and $j points to the highest
+            # such one.
  
              # At this point, here is the situation:
              # This is not an insertion of a multiple, nor of tentative ($NO)
@@ -3439,21 +3681,21 @@ sub trace { return main::trace(@_); }
              #   r[$i-1]->end < $start <= $end <= r[$j]->end
              #
              # Also:
-            #   $cdm is a boolean which is set true if and only if this is a
-            #        change or deletion (multiple was handled above).  In
-            #        other words, it could be renamed to be just $cd.
+            #   $clean_insert is a boolean which is set true if and only if
+            #        this is a "clean insertion", i.e., not a change nor a
+            #        deletion (multiple was handled above).
  
              # We now have enough information to decide if this call is a no-op
-            # or not.  It is a no-op if it is a deletion of a non-existent
-            # range, or an insertion of already existing data.
+            # or not.  It is a no-op if this is an insertion of already
+            # existing data.
  
-            if (main::DEBUG && $to_trace && ! $cdm
+            if (main::DEBUG && $to_trace && $clean_insert
                                           && $i == $j
                                           && $start >= $r->[$i]->start)
              {
                      trace "no-op";
              }
-            return if ! $cdm      # change or delete => not no-op
+            return if $clean_insert
                        && $i == $j # more than one affected range => not no-op
  
                        # Here, r[$i-1]->end < $start <= $end <= r[$i]->end
@@ -3490,7 +3732,7 @@ sub trace { return main::trace(@_); }
              $extends_above = ($j+1 < $range_list_size
                              && $r->[$j+1]->start == $end +1
                              && $r->[$j+1]->standard_form eq $standard_form
-                            && $r->[$j-1]->type == $type);
+                            && $r->[$j+1]->type == $type);
          }
          if ($extends_below && $extends_above) { # Adds to both
              $splice_start--;     # start replace at element below
@@ -3515,7 +3757,7 @@ sub trace { return main::trace(@_); }
                  # Here the new element adds to the one below, but not to the
                  # one above.  If inserting, and only to that one range,  can
                  # just change its ending to include the new one.
-                if ($length == 0 && ! $cdm) {
+                if ($length == 0 && $clean_insert) {
                      $r->[$i-1]->set_end($end);
                      trace "inserted range extends range to below so it is now $r->[$i-1]" if main::DEBUG && $to_trace;
                      return;
@@ -3531,7 +3773,7 @@ sub trace { return main::trace(@_); }
  
                  # Here the new element adds to the one above, but not below.
                  # Mirror the code above
-                if ($length == 0 && ! $cdm) {
+                if ($length == 0 && $clean_insert) {
                      $r->[$j+1]->set_start($start);
                      trace "inserted range extends range to above so it is now $r->[$j+1]" if main::DEBUG && $to_trace;
                      return;
@@ -3612,7 +3854,7 @@ sub trace { return main::trace(@_); }
              trace "i  =[", $i, "]", $r->[$i];
              trace 'i+1=[', $i+1, ']', $r->[$i+1] if $i < @$r - 1;
              trace 'i+2=[', $i+2, ']', $r->[$i+2] if $i < @$r - 2;
-            trace "removed @return";
+            trace "removed ", @return if @return;
          }
  
          # An actual deletion could have changed the maximum in the list.
@@ -3965,6 +4207,36 @@ sub trace { return main::trace(@_); }
          return $self->_add_delete('+', $start, $end, "");
      }
  
+    sub matches_identically_to {
+        # Return a boolean as to whether or not two Range_Lists match identical
+        # sets of code points.
+
+        my $self = shift;
+        my $other = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        # These are ordered in increasing real time to figure out (at least
+        # until a patch changes that and doesn't change this)
+        return 0 if $self->max != $other->max;
+        return 0 if $self->min != $other->min;
+        return 0 if $self->range_count != $other->range_count;
+        return 0 if $self->count != $other->count;
+
+        # Here they could be identical because all the tests above passed.
+        # The loop below is somewhat simpler since we know they have the same
+        # number of elements.  Compare range by range, until reach the end or
+        # find something that differs.
+        my @a_ranges = $self->ranges;
+        my @b_ranges = $other->ranges;
+        for my $i (0 .. @a_ranges - 1) {
+            my $a = $a_ranges[$i];
+            my $b = $b_ranges[$i];
+            trace "self $a; other $b" if main::DEBUG && $to_trace;
+            return 0 if $a->start != $b->start || $a->end != $b->end;
+        }
+        return 1;
+    }
+
      sub is_code_point_usable {
          # This used only for making the test script.  See if the input
          # proposed trial code point is one that Perl will handle.  If second
@@ -3981,8 +4253,6 @@ sub trace { return main::trace(@_); }
          # the character very frequently used.
          return $try_hard if $code == 0x0000;
  
-        return 0 if $try_hard;  # XXX Temporary until fix utf8.c
-
          # shun non-character code points.
          return $try_hard if $code >= 0xFDD0 && $code <= 0xFDEF;
          return $try_hard if ($code & 0xFFFE) == 0xFFFE; # includes FFFF
@@ -4165,6 +4435,10 @@ sub trace { return main::trace(@_); }
      # A comment about its being obsolete, or whatever non normal status it has
      main::set_access('status_info', \%status_info, 'r');
  
+    my %caseless_equivalent;
+    # The table this is equivalent to under /i matching, if any.
+    main::set_access('caseless_equivalent', \%caseless_equivalent, 'r', 's');
+
      my %range_size_1;
      # Is the table to be output with each range only a single code point?
      # This is done to avoid breaking existing code that may have come to rely
@@ -4182,6 +4456,12 @@ sub trace { return main::trace(@_); }
      # The constructor can override the global flag of the same name.
      main::set_access('output_range_counts', \%output_range_counts, 'r');
  
+    my %format;
+    # The format of the entries of the table.  This is calculated from the
+    # data in the table (or passed in the constructor).  This is an enum e.g.,
+    # $STRING_FORMAT
+    main::set_access('format', \%format, 'r', 'p_s');
+
      sub new {
          # All arguments are key => value pairs, which you can see below, most
          # of which match fields documented above.  Otherwise: Pod_Entry,
@@ -4202,6 +4482,7 @@ sub trace { return main::trace(@_); }
          $full_name{$addr} = delete $args{'Full_Name'};
          my $complete_name = $complete_name{$addr}
                            = delete $args{'Complete_Name'};
+        $format{$addr} = delete $args{'Format'};
          $internal_only{$addr} = delete $args{'Internal_Only_Warning'} || 0;
          $output_range_counts{$addr} = delete $args{'Output_Range_Counts'};
          $property{$addr} = delete $args{'_Property'};
@@ -4209,7 +4490,7 @@ sub trace { return main::trace(@_); }
          $status{$addr} = delete $args{'Status'} || $NORMAL;
          $status_info{$addr} = delete $args{'_Status_Info'} || "";
          $range_size_1{$addr} = delete $args{'Range_Size_1'} || 0;
-        $range_size_1{$addr} = 1 if $output_names;  # Make sure 1 name per line
+        $caseless_equivalent{$addr} = delete $args{'Caseless_Equivalent'} || 0;
  
          my $description = delete $args{'Description'};
          my $externally_ok = delete $args{'Externally_Ok'};
@@ -4252,7 +4533,7 @@ sub trace { return main::trace(@_); }
              # not, is normal.  The lists are prioritized so the most serious
              # ones are checked first
              if (exists $why_suppressed{$complete_name}
-                # Don't suppress if overriden
+                # Don't suppress if overridden
                  && ! grep { $_ eq $complete_name{$addr} }
                                                      @output_mapped_properties)
              {
@@ -4319,12 +4600,18 @@ sub trace { return main::trace(@_); }
      # Here are the methods that are required to be defined by any derived
      # class
      for my $sub (qw(
+                    handle_special_range
                      append_to_body
                      pre_body
                  ))
-                # append_to_body and pre_body are called in the write() method
-                # to add stuff after the main body of the table, but before
-                # its close; and to prepend stuff before the beginning of the
+                # write() knows how to write out normal ranges, but it calls
+                # handle_special_range() when it encounters a non-normal one.
+                # append_to_body() is called by it after it has handled all
+                # ranges to add anything after the main portion of the table.
+                # And finally, pre_body() is called after all this to build up
+                # anything that should appear before the main portion of the
+                # table.  Doing it this way allows things in the middle to
+                # affect what should appear before the main portion of the
                  # table.
      {
          no strict "refs";
@@ -4564,6 +4851,8 @@ sub trace { return main::trace(@_); }
  
      sub add_comment { # Adds the parameter as a comment.
  
+        return unless $debugging_build;
+
          my $self = shift;
          my $comment = shift;
          Carp::carp_extra_args(\@_) if main::DEBUG && @_;
@@ -4631,7 +4920,10 @@ sub trace { return main::trace(@_); }
      }
  
      sub write {
-        # Write a representation of the table to its file.
+        # Write a representation of the table to its file.  It calls several
+        # functions furnished by sub-classes of this abstract base class to
+        # handle non-normal ranges, to add stuff before the table, and at its
+        # end.
  
          my $self = shift;
          my $tab_stops = shift;       # The number of tab stops over to put any
@@ -4644,18 +4936,30 @@ sub trace { return main::trace(@_); }
          my $addr = do { no overloading; pack 'J', $self; };
  
          # Start with the header
-        my @OUT = $self->header;
+        my @HEADER = $self->header;
  
          # Then the comments
-        push @OUT, "\n", main::simple_fold($comment{$addr}, '# '), "\n"
+        push @HEADER, "\n", main::simple_fold($comment{$addr}, '# '), "\n"
                                                          if $comment{$addr};
  
-        # Then any pre-body stuff.
-        my $pre_body = $self->pre_body;
-        push @OUT, $pre_body, "\n" if $pre_body;
-
-        # The main body looks like a 'here' document
-        push @OUT, "return <<'END';\n";
+        # Things discovered processing the main body of the document may
+        # affect what gets output before it, therefore pre_body() isn't called
+        # until after all other processing of the table is done.
+
+        # The main body looks like a 'here' document.  If annotating, get rid
+        # of the comments before passing to the caller, as some callers, such
+        # as charnames.pm, can't cope with them.  (Outputting range counts
+        # also introduces comments, but these don't show up in the tables that
+        # can't cope with comments, and there aren't that many of them that
+        # it's worth the extra real time to get rid of them).
+        my @OUT;
+        if ($annotate) {
+            # Use the line below in Perls that don't have /r
+            #push @OUT, 'return join "\n",  map { s/\s*#.*//mg; $_ } split "\n", <<\'END\';' . "\n";
+            push @OUT, "return <<'END' =~ s/\\s*#.*//mgr;\n";
+        } else {
+            push @OUT, "return <<'END';\n";
+        }
  
          if ($range_list{$addr}->is_empty) {
  
@@ -4667,40 +4971,49 @@ sub trace { return main::trace(@_); }
          }
          else {
              my $range_size_1 = $range_size_1{$addr};
+            my $format;            # Used only in $annotate option
+            my $include_name;      # Used only in $annotate option
+
+            if ($annotate) {
+
+                # if annotating each code point, must print 1 per line.
+                # The variable could point to a subroutine, and we don't want
+                # to lose that fact, so only set if not set already
+                $range_size_1 = 1 if ! $range_size_1;
+
+                $format = $self->format;
+
+                # The name of the character is output only for tables that
+                # don't already include the name in the output.
+                my $property = $self->property;
+                $include_name =
+                    !  ($property == $perl_charname
+                        || $property == main::property_ref('Unicode_1_Name')
+                        || $property == main::property_ref('Name')
+                        || $property == main::property_ref('Name_Alias')
+                       );
+            }
  
              # Output each range as part of the here document.
+            RANGE:
              for my $set ($range_list{$addr}->ranges) {
+                if ($set->type != 0) {
+                    $self->handle_special_range($set);
+                    next RANGE;
+                }
                  my $start = $set->start;
                  my $end   = $set->end;
                  my $value  = $set->value;
  
                  # Don't output ranges whose value is the one to suppress
-                next if defined $suppress_value && $value eq $suppress_value;
+                next RANGE if defined $suppress_value
+                              && $value eq $suppress_value;
  
-                # If has or wants a single point range output
-                if ($start == $end || $range_size_1) {
-                    if (ref $range_size_1 eq 'CODE') {
-                        for my $i ($start .. $end) {
-                            push @OUT, &$range_size_1($i, $value);
-                        }
-                    }
-                    else {
-                        for my $i ($start .. $end) {
-                            push @OUT, sprintf "%04X\t\t%s\n", $i, $value;
-                            if ($output_names) {
-                                if (! defined $viacode[$i]) {
-                                    $viacode[$i] =
-                                        Property::property_ref('Perl_Charnames')
-                                                                    ->value_of($i)
-                                        || "";
-                                }
-                                $OUT[-1] =~ s/\n/\t# $viacode[$i]\n/;
-                            }
-                        }
-                    }
-                }
-                else  {
-                    push @OUT, sprintf "%04X\t%04X\t%s", $start, $end, $value;
+                # If there is a range and doesn't need a single point range
+                # output
+                if ($start != $end && ! $range_size_1) {
+                    push @OUT, sprintf "%04X\t%04X", $start, $end;
+                    $OUT[-1] .= "\t$value" if $value ne "";
  
                      # Add a comment with the size of the range, if requested.
                      # Expand Tabs to make sure they all start in the same
@@ -4720,6 +5033,166 @@ sub trace { return main::trace(@_); }
                                              $count);
                          $OUT[-1] = Text::Tabs::unexpand($OUT[-1]);
                      }
+                    next RANGE;
+                }
+
+                # Here to output a single code point per line
+
+                # If not to annotate, use the simple formats
+                if (! $annotate) {
+
+                    # Use any passed in subroutine to output.
+                    if (ref $range_size_1 eq 'CODE') {
+                        for my $i ($start .. $end) {
+                            push @OUT, &{$range_size_1}($i, $value);
+                        }
+                    }
+                    else {
+
+                        # Here, caller is ok with default output.
+                        for (my $i = $start; $i <= $end; $i++) {
+                            push @OUT, sprintf "%04X\t\t%s\n", $i, $value;
+                        }
+                    }
+                    next RANGE;
+                }
+
+                # Here, wants annotation.
+                for (my $i = $start; $i <= $end; $i++) {
+
+                    # Get character information if don't have it already
+                    main::populate_char_info($i)
+                                        if ! defined $viacode[$i];
+                    my $type = $annotate_char_type[$i];
+
+                    # Figure out if should output the next code points as part
+                    # of a range or not.  If this is not in an annotation
+                    # range, then won't output as a range, so returns $i.
+                    # Otherwise use the end of the annotation range, but no
+                    # further than the maximum possible end point of the loop.
+                    my $range_end = main::min($annotate_ranges->value_of($i)
+                                                                        || $i,
+                                               $end);
+
+                    # Use a range if it is a range, and either is one of the
+                    # special annotation ranges, or the range is at most 3
+                    # long.  This last case causes the algorithmically named
+                    # code points to be output individually in spans of at
+                    # most 3, as they are the ones whose $type is > 0.
+                    if ($range_end != $i
+                        && ( $type < 0 || $range_end - $i > 2))
+                    {
+                        # Here is to output a range.  We don't allow a
+                        # caller-specified output format--just use the
+                        # standard one.
+                        push @OUT, sprintf "%04X\t%04X\t%s\t#", $i,
+                                                                $range_end,
+                                                                $value;
+                        my $range_name = $viacode[$i];
+
+                        # For the code points which end in their hex value, we
+                        # eliminate that from the output annotation, and
+                        # capitalize only the first letter of each word.
+                        if ($type == $CP_IN_NAME) {
+                            my $hex = sprintf "%04X", $i;
+                            $range_name =~ s/-$hex$//;
+                            my @words = split " ", $range_name;
+                            for my $word (@words) {
+                                $word = ucfirst(lc($word)) if $word ne 'CJK';
+                            }
+                            $range_name = join " ", @words;
+                        }
+                        elsif ($type == $HANGUL_SYLLABLE) {
+                            $range_name = "Hangul Syllable";
+                        }
+
+                        $OUT[-1] .= " $range_name" if $range_name;
+
+                        # Include the number of code points in the range
+                        my $count = main::clarify_number($range_end - $i + 1);
+                        $OUT[-1] .= " [$count]\n";
+
+                        # Skip to the end of the range
+                        $i = $range_end;
+                    }
+                    else { # Not in a range.
+                        my $comment = "";
+
+                        # When outputting the names of each character, use
+                        # the character itself if printable
+                        $comment .= "'" . chr($i) . "' " if $printable[$i];
+
+                        # To make it more readable, use a minimum indentation
+                        my $comment_indent;
+
+                        # Determine the annotation
+                        if ($format eq $DECOMP_STRING_FORMAT) {
+
+                            # This is very specialized, with the type of
+                            # decomposition beginning the line enclosed in
+                            # <...>, and the code points that the code point
+                            # decomposes to separated by blanks.  Create two
+                            # strings, one of the printable characters, and
+                            # one of their official names.
+                            (my $map = $value) =~ s/ \ * < .*? > \ +//x;
+                            my $tostr = "";
+                            my $to_name = "";
+                            my $to_chr = "";
+                            foreach my $to (split " ", $map) {
+                                $to = CORE::hex $to;
+                                $to_name .= " + " if $to_name;
+                                $to_chr .= chr($to);
+                                main::populate_char_info($to)
+                                                    if ! defined $viacode[$to];
+                                $to_name .=  $viacode[$to];
+                            }
+
+                            $comment .=
+                                    "=> '$to_chr'; $viacode[$i] => $to_name";
+                            $comment_indent = 25;   # Determined by experiment
+                        }
+                        else {
+
+                            # Assume that any table that has hex format is a
+                            # mapping of one code point to another.
+                            if ($format eq $HEX_FORMAT) {
+                                my $decimal_value = CORE::hex $value;
+                                main::populate_char_info($decimal_value)
+                                        if ! defined $viacode[$decimal_value];
+                                $comment .= "=> '"
+                                         . chr($decimal_value)
+                                         . "'; " if $printable[$decimal_value];
+                            }
+                            $comment .= $viacode[$i] if $include_name
+                                                        && $viacode[$i];
+                            if ($format eq $HEX_FORMAT) {
+                                my $decimal_value = CORE::hex $value;
+                                $comment .= " => $viacode[$decimal_value]"
+                                                    if $viacode[$decimal_value];
+                            }
+
+                            # If including the name, no need to indent, as the
+                            # name will already be way across the line.
+                            $comment_indent = ($include_name) ? 0 : 60;
+                        }
+
+                        # Use any passed in routine to output the base part of
+                        # the line.
+                        if (ref $range_size_1 eq 'CODE') {
+                            my $base_part = &{$range_size_1}($i, $value);
+                            chomp $base_part;
+                            push @OUT, $base_part;
+                        }
+                        else {
+                            push @OUT, sprintf "%04X\t\t%s", $i, $value;
+                        }
+
+                        # And add the annotation.
+                        $OUT[-1] = sprintf "%-*s\t# %s", $comment_indent,
+                                                         $OUT[-1],
+                                                         $comment if $comment;
+                        $OUT[-1] .= "\n";
+                    }
                  }
              } # End of loop through all the table's ranges
          }
@@ -4732,10 +5205,18 @@ sub trace { return main::trace(@_); }
          # And finish the here document.
          push @OUT, "END\n";
  
+        # Done with the main portion of the body.  Can now figure out what
+        # should appear before it in the file.
+        my $pre_body = $self->pre_body;
+        push @HEADER, $pre_body, "\n" if $pre_body;
+
          # All these files have a .pl suffix
          $file_path{$addr}->[-1] .= '.pl';
  
-        main::write($file_path{$addr}, \@OUT);
+        main::write($file_path{$addr},
+                    $annotate,      # utf8 iff annotating
+                    \@HEADER,
+                    \@OUT);
          return;
      }
  
@@ -4807,15 +5288,18 @@ sub trace { return main::trace(@_); }
      # Accessors for the range list stored in this table.  First for
      # unconditional
      for my $sub (qw(
+                    containing_range
                      contains
                      count
                      each_range
                      hash
                      is_empty
+                    matches_identically_to
                      max
                      min
                      range_count
                      reset_each_range
+                    type_of
                      value_of
                  ))
      {
@@ -4895,28 +5379,19 @@ sub trace { return main::trace(@_); }
                      \%anomalous_entries,
                      'readable_array');
  
-    my %format;
-    # The format of the entries of the table.  This is calculated from the
-    # data in the table (or passed in the constructor).  This is an enum e.g.,
-    # $STRING_FORMAT
-    main::set_access('format', \%format);
-
      my %core_access;
      # This is a string, solely for documentation, indicating how one can get
      # access to this property via the Perl core.
      main::set_access('core_access', \%core_access, 'r', 's');
  
-    my %has_specials;
-    # Boolean set when non-zero map-type ranges are added to this table,
-    # which happens in only a few tables.  This is purely for performance, to
-    # avoid having to search through every table upon output, so if all the
-    # non-zero maps got deleted before output, this would remain set, and the
-    # only penalty would be performance.  Currently, most map tables that get
-    # output have specials in them, so this doesn't help that much anyway.
-    main::set_access('has_specials', \%has_specials);
-
      my %to_output_map;
-    # Boolean as to whether or not to write out this map table
+    # Enum as to whether or not to write out this map table:
+    #   $EXTERNAL_MAP   means its existence is noted in the documentation, and
+    #                   it should not be removed nor its format changed.  This
+    #                   is done for those files that have traditionally been
+    #                   output.
+    #   $INTERNAL_MAP   means Perl reserves the right to do anything it wants
+    #                   with this file
      main::set_access('to_output_map', \%to_output_map, 's');
  
  
@@ -4931,9 +5406,9 @@ sub trace { return main::trace(@_); }
  
          my $core_access = delete $args{'Core_Access'};
          my $default_map = delete $args{'Default_Map'};
-        my $format = delete $args{'Format'};
          my $property = delete $args{'_Property'};
          my $full_name = delete $args{'Full_Name'};
+
          # Rest of parameters passed on
  
          my $range_list = Range_Map->new(Owner => $property);
@@ -4951,7 +5426,6 @@ sub trace { return main::trace(@_); }
          $anomalous_entries{$addr} = [];
          $core_access{$addr} = $core_access;
          $default_map{$addr} = $default_map;
-        $format{$addr} = $format;
  
          $self->initialize($initialize) if defined $initialize;
  
@@ -5000,8 +5474,6 @@ sub trace { return main::trace(@_); }
  
          my $addr = do { no overloading; pack 'J', $self; };
  
-        $has_specials{$addr} = 1 if $type;
-
          $self->_range_list->add_map($lower, $upper,
                                      $string,
                                      @_,
@@ -5061,11 +5533,6 @@ sub trace { return main::trace(@_); }
                                            Replace => $UNCONDITIONALLY);
          }
  
-        # Copy the specials information from the other table to $self
-        if ($has_specials{$other_addr}) {
-            $has_specials{$addr} = 1;
-        }
-
          return;
      }
  
@@ -5128,9 +5595,11 @@ sub trace { return main::trace(@_); }
          return $to_output_map{$addr} if defined $to_output_map{$addr};
  
          my $full_name = $self->full_name;
+        return $global_to_output_map{$full_name}
+                                if defined $global_to_output_map{$full_name};
  
-        # If table says to output, do so; if says to suppress it, do do.
-        return 1 if grep { $_ eq $full_name } @output_mapped_properties;
+        # If table says to output, do so; if says to suppress it, do so.
+        return $EXTERNAL_MAP if grep { $_ eq $full_name } @output_mapped_properties;
          return 0 if $self->status eq $SUPPRESSED;
  
          my $type = $self->property->type;
@@ -5139,10 +5608,10 @@ sub trace { return main::trace(@_); }
          return 0 if $type == $BINARY;
  
          # But do want to output string ones.
-        return 1 if $type == $STRING;
+        return $EXTERNAL_MAP if $type == $STRING;
  
-        # Otherwise is an $ENUM, don't output it
-        return 0;
+        # Otherwise is an $ENUM, do output it, for Perl's purposes
+        return $INTERNAL_MAP;
      }
  
      sub inverse_list {
@@ -5157,10 +5626,22 @@ sub trace { return main::trace(@_); }
          return ~ $current;
      }
  
+    sub header {
+        my $self = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $return = $self->SUPER::header();
+
+        $return .= $INTERNAL_ONLY if $self->to_output_map == $INTERNAL_MAP;
+        return $return;
+    }
+
      sub set_final_comment {
          # Just before output, create the comment that heads the file
          # containing this table.
  
+        return unless $debugging_build;
+
          my $self = shift;
          Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
@@ -5329,12 +5810,133 @@ END
  
      my %swash_keys; # Makes sure don't duplicate swash names.
  
+    # The remaining variables are temporaries used while writing each table,
+    # to output special ranges.
+    my $has_hangul_syllables;
+    my @multi_code_point_maps;  # Map is to more than one code point.
+
+    # The key is the base name of the code point, and the value is an
+    # array giving all the ranges that use this base name.  Each range
+    # is actually a hash giving the 'low' and 'high' values of it.
+    my %names_ending_in_code_point;
+
+    # Inverse mapping.  The list of ranges that have these kinds of
+    # names.  Each element contains the low, high, and base names in a
+    # hash.
+    my @code_points_ending_in_code_point;
+
+    sub handle_special_range {
+        # Called in the middle of write when it finds a range it doesn't know
+        # how to handle.
+
+        my $self = shift;
+        my $range = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $addr = do { no overloading; pack 'J', $self; };
+
+        my $type = $range->type;
+
+        my $low = $range->start;
+        my $high = $range->end;
+        my $map = $range->value;
+
+        # No need to output the range if it maps to the default.
+        return if $map eq $default_map{$addr};
+
+        # Switch based on the map type...
+        if ($type == $HANGUL_SYLLABLE) {
+
+            # These are entirely algorithmically determinable based on
+            # some constants furnished by Unicode; for now, just set a
+            # flag to indicate that have them.  After everything is figured
+            # out, we will output the code that does the algorithm.
+            $has_hangul_syllables = 1;
+        }
+        elsif ($type == $CP_IN_NAME) {
+
+            # Code points whose the name ends in their code point are also
+            # algorithmically determinable, but need information about the map
+            # to do so.  Both the map and its inverse are stored in data
+            # structures output in the file.
+            push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
+            push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
+
+            push @code_points_ending_in_code_point, { low => $low,
+                                                        high => $high,
+                                                        name => $map
+                                                    };
+        }
+        elsif ($range->type == $MULTI_CP || $range->type == $NULL) {
+
+            # Multi-code point maps and null string maps have an entry
+            # for each code point in the range.  They use the same
+            # output format.
+            for my $code_point ($low .. $high) {
+
+                # The pack() below can't cope with surrogates.
+                if ($code_point >= 0xD800 && $code_point <= 0xDFFF) {
+                    Carp::my_carp("Surrogate code point '$code_point' in mapping to '$map' in $self.  No map created");
+                    next;
+                }
+
+                # Generate the hash entries for these in the form that
+                # utf8.c understands.
+                my $tostr = "";
+                my $to_name = "";
+                my $to_chr = "";
+                foreach my $to (split " ", $map) {
+                    if ($to !~ /^$code_point_re$/) {
+                        Carp::my_carp("Illegal code point '$to' in mapping '$map' from $code_point in $self.  No map created");
+                        next;
+                    }
+                    $tostr .= sprintf "\\x{%s}", $to;
+                    $to = CORE::hex $to;
+                    if ($annotate) {
+                        $to_name .= " + " if $to_name;
+                        $to_chr .= chr($to);
+                        main::populate_char_info($to)
+                                            if ! defined $viacode[$to];
+                        $to_name .=  $viacode[$to];
+                    }
+                }
+
+                # I (khw) have never waded through this line to
+                # understand it well enough to comment it.
+                my $utf8 = sprintf(qq["%s" => "$tostr",],
+                        join("", map { sprintf "\\x%02X", $_ }
+                            unpack("U0C*", pack("U", $code_point))));
+
+                # Add a comment so that a human reader can more easily
+                # see what's going on.
+                push @multi_code_point_maps,
+                        sprintf("%-45s # U+%04X", $utf8, $code_point);
+                if (! $annotate) {
+                    $multi_code_point_maps[-1] .= " => $map";
+                }
+                else {
+                    main::populate_char_info($code_point)
+                                    if ! defined $viacode[$code_point];
+                    $multi_code_point_maps[-1] .= " '"
+                        . chr($code_point)
+                        . "' => '$to_chr'; $viacode[$code_point] => $to_name";
+                }
+            }
+        }
+        else {
+            Carp::my_carp("Unrecognized map type '$range->type' in '$range' in $self.  Not written");
+        }
+
+        return;
+    }
+
      sub pre_body {
          # Returns the string that should be output in the file before the main
-        # body of this table.  This includes some hash entries identifying the
-        # format of the body, and what the single value should be for all
-        # ranges missing from it.  It also includes any code points which have
-        # map_types that don't go in the main table.
+        # body of this table.  It isn't called until the main body is
+        # calculated, saving a pass.  The string includes some hash entries
+        # identifying the format of the body, and what the single value should
+        # be for all ranges missing from it.  It also includes any code points
+        # which have map_types that don't go in the main table.
  
          my $self = shift;
          Carp::carp_extra_args(\@_) if main::DEBUG && @_;
@@ -5353,119 +5955,13 @@ END
          }
          $swash_keys{$name} = "$self";
  
-        my $default_map = $default_map{$addr};
-
          my $pre_body = "";
-        if ($has_specials{$addr}) {
-
-            # Here, some maps with non-zero type have been added to the table.
-            # Go through the table and handle each of them.  None will appear
-            # in the body of the table, so delete each one as we go.  The
-            # code point count has already been calculated, so ok to delete
-            # now.
-
-            my @multi_code_point_maps;
-            my $has_hangul_syllables = 0;
-
-            # The key is the base name of the code point, and the value is an
-            # array giving all the ranges that use this base name.  Each range
-            # is actually a hash giving the 'low' and 'high' values of it.
-            my %names_ending_in_code_point;
-
-            # Inverse mapping.  The list of ranges that have these kinds of
-            # names.  Each element contains the low, high, and base names in a
-            # hash.
-            my @code_points_ending_in_code_point;
-
-            my $range_map = $self->_range_list;
-            foreach my $range ($range_map->ranges) {
-                next unless $range->type != 0;
-                my $low = $range->start;
-                my $high = $range->end;
-                my $map = $range->value;
-                my $type = $range->type;
-
-                # No need to output the range if it maps to the default.  And
-                # the write method won't output it either, so no need to
-                # delete it to keep it from being output, and is faster to
-                # skip than to delete anyway.
-                next if $map eq $default_map;
-
-                # Delete the range to keep write() from trying to output it
-                $range_map->delete_range($low, $high);
-
-                # Switch based on the map type...
-                if ($type == $HANGUL_SYLLABLE) {
-
-                    # These are entirely algorithmically determinable based on
-                    # some constants furnished by Unicode; for now, just set a
-                    # flag to indicate that have them.  Below we will output
-                    # the code that does the algorithm.
-                    $has_hangul_syllables = 1;
-                }
-                elsif ($type == $CP_IN_NAME) {
-
-                    # If the name ends in the code point it represents, are
-                    # also algorithmically determinable, but need information
-                    # about the map to do so.  Both the map and its inverse
-                    # are stored in data structures output in the file.
-                    push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
-                    push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
-
-                    push @code_points_ending_in_code_point, { low => $low,
-                                                              high => $high,
-                                                              name => $map
-                                                            };
-                }
-                elsif ($range->type == $MULTI_CP || $range->type == $NULL) {
-
-                    # Multi-code point maps and null string maps have an entry
-                    # for each code point in the range.  They use the same
-                    # output format.
-                    for my $code_point ($low .. $high) {
-
-                        # The pack() below can't cope with surrogates.
-                        if ($code_point >= 0xD800 && $code_point <= 0xDFFF) {
-                            Carp::my_carp("Surrogage code point '$code_point' in mapping to '$map' in $self.  No map created");
-                            next;
-                        }
-
-                        # Generate the hash entries for these in the form that
-                        # utf8.c understands.
-                        my $tostr = "";
-                        foreach my $to (split " ", $map) {
-                            if ($to !~ /^$code_point_re$/) {
-                                Carp::my_carp("Illegal code point '$to' in mapping '$map' from $code_point in $self.  No map created");
-                                next;
-                            }
-                            $tostr .= sprintf "\\x{%s}", $to;
-                        }
  
-                        # I (khw) have never waded through this line to
-                        # understand it well enough to comment it.
-                        my $utf8 = sprintf(qq["%s" => "$tostr",],
-                                join("", map { sprintf "\\x%02X", $_ }
-                                    unpack("U0C*", pack("U", $code_point))));
-
-                        # Add a comment so that a human reader can more easily
-                        # see what's going on.
-                        push @multi_code_point_maps,
-                                sprintf("%-45s # U+%04X => %s", $utf8,
-                                                                $code_point,
-                                                                $map);
-                    }
-                }
-                else {
-                    Carp::my_carp("Unrecognized map type '$range->type' in '$range' in $self.  Using type 0 instead");
-                    $range_map->add_map($low, $high, $map, Replace => $UNCONDITIONALLY, Type => 0);
-                }
-            } # End of loop through all ranges
-
-            # Here have gone through the whole file.  If actually generated
-            # anything for each map type, add its respective header and
-            # trailer
-            if (@multi_code_point_maps) {
-                $pre_body .= <<END;
+        # Here we assume we were called after have gone through the whole
+        # file.  If we actually generated anything for each map type, add its
+        # respective header and trailer
+        if (@multi_code_point_maps) {
+            $pre_body .= <<END;
  
  # Some code points require special handling because their mappings are each to
  # multiple code points.  These do not appear in the main body, but are defined
@@ -5476,59 +5972,59 @@ END
  # under "use bytes").  Each value is the UTF-8 of the translation, for speed.
  %utf8::ToSpec$name = (
  END
-                $pre_body .= join("\n", @multi_code_point_maps) . "\n);\n";
-            }
-
-            if ($has_hangul_syllables || @code_points_ending_in_code_point) {
-
-                # Convert these structures to output format.
-                my $code_points_ending_in_code_point =
-                    main::simple_dumper(\@code_points_ending_in_code_point,
-                                        ' ' x 8);
-                my $names = main::simple_dumper(\%names_ending_in_code_point,
-                                                ' ' x 8);
-
-                # Do the same with the Hangul names,
-                my $jamo;
-                my $jamo_l;
-                my $jamo_v;
-                my $jamo_t;
-                my $jamo_re;
-                if ($has_hangul_syllables) {
-
-                    # Construct a regular expression of all the possible
-                    # combinations of the Hangul syllables.
-                    my @L_re;   # Leading consonants
-                    for my $i ($LBase .. $LBase + $LCount - 1) {
-                        push @L_re, $Jamo{$i}
-                    }
-                    my @V_re;   # Middle vowels
-                    for my $i ($VBase .. $VBase + $VCount - 1) {
-                        push @V_re, $Jamo{$i}
-                    }
-                    my @T_re;   # Trailing consonants
-                    for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
-                        push @T_re, $Jamo{$i}
-                    }
-
-                    # The whole re is made up of the L V T combination.
-                    $jamo_re = '('
-                               . join ('|', sort @L_re)
-                               . ')('
-                               . join ('|', sort @V_re)
-                               . ')('
-                               . join ('|', sort @T_re)
-                               . ')?';
-
-                    # These hashes needed by the algorithm were generated
-                    # during reading of the Jamo.txt file
-                    $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
-                    $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
-                    $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
-                    $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
+            $pre_body .= join("\n", @multi_code_point_maps) . "\n);\n";
+        }
+
+        if ($has_hangul_syllables || @code_points_ending_in_code_point) {
+
+            # Convert these structures to output format.
+            my $code_points_ending_in_code_point =
+                main::simple_dumper(\@code_points_ending_in_code_point,
+                                    ' ' x 8);
+            my $names = main::simple_dumper(\%names_ending_in_code_point,
+                                            ' ' x 8);
+
+            # Do the same with the Hangul names,
+            my $jamo;
+            my $jamo_l;
+            my $jamo_v;
+            my $jamo_t;
+            my $jamo_re;
+            if ($has_hangul_syllables) {
+
+                # Construct a regular expression of all the possible
+                # combinations of the Hangul syllables.
+                my @L_re;   # Leading consonants
+                for my $i ($LBase .. $LBase + $LCount - 1) {
+                    push @L_re, $Jamo{$i}
+                }
+                my @V_re;   # Middle vowels
+                for my $i ($VBase .. $VBase + $VCount - 1) {
+                    push @V_re, $Jamo{$i}
                  }
+                my @T_re;   # Trailing consonants
+                for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
+                    push @T_re, $Jamo{$i}
+                }
+
+                # The whole re is made up of the L V T combination.
+                $jamo_re = '('
+                            . join ('|', sort @L_re)
+                            . ')('
+                            . join ('|', sort @V_re)
+                            . ')('
+                            . join ('|', sort @T_re)
+                            . ')?';
+
+                # These hashes needed by the algorithm were generated
+                # during reading of the Jamo.txt file
+                $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
+                $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
+                $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
+                $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
+            }
  
-                $pre_body .= <<END;
+            $pre_body .= <<END;
  
  # To achieve significant memory savings when this file is read in,
  # algorithmically derivable code points are omitted from the main body below.
@@ -5555,10 +6051,10 @@ $names
  $code_points_ending_in_code_point
      );
  END
-                # Earlier releases didn't have Jamos.  No sense outputting
-                # them unless will be used.
-                if ($has_hangul_syllables) {
-                    $pre_body .= <<END;
+            # Earlier releases didn't have Jamos.  No sense outputting
+            # them unless will be used.
+            if ($has_hangul_syllables) {
+                $pre_body .= <<END;
  
      # Convert from code point to Jamo short name for use in composing Hangul
      # syllable names
@@ -5600,9 +6096,9 @@ $jamo_t
      my \$TCount = $TCount;
      my \$NCount = \$VCount * \$TCount;
  END
-                } # End of has Jamos
+            } # End of has Jamos
  
-                $pre_body .= << 'END';
+            $pre_body .= << 'END';
  
      sub name_to_code_point_special {
          my $name = shift;
@@ -5610,8 +6106,8 @@ END
          # Returns undef if not one of the specially handled names; otherwise
          # returns the code point equivalent to the input name
  END
-                if ($has_hangul_syllables) {
-                    $pre_body .= << 'END';
+            if ($has_hangul_syllables) {
+                $pre_body .= << 'END';
  
          if (substr($name, 0, $HANGUL_SYLLABLE_LENGTH) eq $HANGUL_SYLLABLE) {
              $name = substr($name, $HANGUL_SYLLABLE_LENGTH);
@@ -5622,8 +6118,8 @@ END
              return ($L * $VCount + $V) * $TCount + $T + $SBase;
          }
  END
-                }
-                $pre_body .= << 'END';
+            }
+            $pre_body .= << 'END';
  
          # Name must end in '-code_point' for this to handle.
          if ($name !~ /^ (.*) - ($code_point_re) $/x) {
@@ -5657,8 +6153,8 @@ END
          # Returns the name of a code point if algorithmically determinable;
          # undef if not
  END
-                if ($has_hangul_syllables) {
-                    $pre_body .= << 'END';
+            if ($has_hangul_syllables) {
+                $pre_body .= << 'END';
  
          # If in the Hangul range, calculate the name based on Unicode's
          # algorithm
@@ -5673,8 +6169,8 @@ END
              return $name;
          }
  END
-                }
-                $pre_body .= << 'END';
+            }
+            $pre_body .= << 'END';
  
          # Look through list of these code points for one in range.
          foreach my $hash (@code_points_ending_in_code_point) {
@@ -5688,13 +6184,50 @@ END
  } # End closure
  
  END
-            } # End of has hangul or code point in name maps.
-        } # End of has specials
+        } # End of has hangul or code point in name maps.
+
+        my $format = $self->format;
+
+        my $return = <<END;
+# The name this swash is to be known by, with the format of the mappings in
+# the main body of the table, and what all code points missing from this file
+# map to.
+\$utf8::SwashInfo{'To$name'}{'format'} = '$format'; # $map_table_formats{$format}
+END
+        my $default_map = $default_map{$addr};
+        $return .= "\$utf8::SwashInfo{'To$name'}{'missing'} = '$default_map';";
+
+        if ($default_map eq $CODE_POINT) {
+            $return .= ' # code point maps to itself';
+        }
+        elsif ($default_map eq "") {
+            $return .= ' # code point maps to the null string';
+        }
+        $return .= "\n";
+
+        $return .= $pre_body;
+
+        return $return;
+    }
+
+    sub write {
+        # Write the table to the file.
+
+        my $self = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $addr = do { no overloading; pack 'J', $self; };
+
+        # Clear the temporaries
+        $has_hangul_syllables = 0;
+        undef @multi_code_point_maps;
+        undef %names_ending_in_code_point;
+        undef @code_points_ending_in_code_point;
  
          # Calculate the format of the table if not already done.
-        my $format = $format{$addr};
-        my $property = $self->property;
-        my $type = $property->type;
+        my $format = $self->format;
+        my $type = $self->property->type;
+        my $default_map = $self->default_map;
          if (! defined $format) {
              if ($type == $BINARY) {
  
@@ -5725,6 +6258,8 @@ END
                      # most restrictive, and so on.
                      $format = $DECIMAL_FORMAT;
                      foreach my $range (@ranges) {
+                        next if $range->type != 0;  # Non-normal ranges don't
+                                                    # affect the main body
                          my $map = $range->value;
                          if ($map ne $default_map) {
                              last if $format eq $STRING_FORMAT;  # already at
@@ -5750,47 +6285,27 @@ END
              }
          } # end of calculating format
  
-        my $return = <<END;
-# The name this swash is to be known by, with the format of the mappings in
-# the main body of the table, and what all code points missing from this file
-# map to.
-\$utf8::SwashInfo{'To$name'}{'format'} = '$format'; # $map_table_formats{$format}
-END
-        my $missing = $default_map;
-        if ($missing eq $CODE_POINT
+        if ($default_map eq $CODE_POINT
              && $format ne $HEX_FORMAT
-            && ! defined $format{$addr})    # Is expected if was manually set
+            && ! defined $self->format)    # manual settings are always
+                                           # considered ok
          {
              Carp::my_carp_bug("Expecting hex format for mapping table for $self, instead got '$format'")
          }
-        $format{$addr} = $format;
-        $return .= "\$utf8::SwashInfo{'To$name'}{'missing'} = '$missing';";
-        if ($missing eq $CODE_POINT) {
-            $return .= ' # code point maps to itself';
-        }
-        elsif ($missing eq "") {
-            $return .= ' # code point maps to the null string';
-        }
-        $return .= "\n";
-
-        $return .= $pre_body;
  
-        return $return;
-    }
-
-    sub write {
-        # Write the table to the file.
-
-        my $self = shift;
-        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+        $self->_set_format($format);
  
-        my $addr = do { no overloading; pack 'J', $self; };
+        # Core Perl has a different definition of mapping ranges than we do,
+        # that is applicable mainly to mapping code points, so for tables
+        # where it is possible that core Perl could be used to read it,
+        # make it range size 1 to prevent possible confusion
+        $self->set_range_size_1(1) if $format eq $HEX_FORMAT;
  
          return $self->SUPER::write(
              ($self->property == $block)
                  ? 7     # block file needs more tab stops
                  : 3,
-            $default_map{$addr});   # don't write defaulteds
+            $default_map);   # don't write defaulteds
      }
  
      # Accessors for the underlying list that should fail if locked.
@@ -5908,6 +6423,7 @@ sub trace { return main::trace(@_); }
          # Optional
          my $initialize = delete $args{'Initialize'};
          my $matches_all = delete $args{'Matches_All'} || 0;
+        my $format = delete $args{'Format'};
          # Rest of parameters passed on.
  
          my $range_list = Range_List->new(Initialize => $initialize,
@@ -5930,6 +6446,7 @@ sub trace { return main::trace(@_); }
                                        Full_Name => $full_name,
                                        _Property => $property,
                                        _Range_List => $range_list,
+                                      Format => $EMPTY_FORMAT,
                                        );
          my $addr = do { no overloading; pack 'J', $self; };
  
@@ -5940,6 +6457,10 @@ sub trace { return main::trace(@_); }
          $leader{$addr} = $self;
          $parent{$addr} = $self;
  
+        if (defined $format && $format ne $EMPTY_FORMAT) {
+            Carp::my_carp_bug("'Format' must be '$EMPTY_FORMAT' in a match table instead of '$format'.  Using '$EMPTY_FORMAT'");
+        }
+
          return $self;
      }
  
@@ -6068,7 +6589,7 @@ sub trace { return main::trace(@_); }
          return;
      }
  
-    sub is_equivalent_to {
+    sub is_set_equivalent_to {
          # Return boolean of whether or not the other object is a table of this
          # type and has been marked equivalent to this one.
  
@@ -6081,7 +6602,7 @@ sub trace { return main::trace(@_); }
          unless ($other->isa(__PACKAGE__)) {
              my $ref_other = ref $other;
              my $ref_self = ref $self;
-            Carp::my_carp_bug("Argument to 'is_equivalent_to' must be another $ref_self, not a '$ref_other'.  $other not set equivalent to $self.");
+            Carp::my_carp_bug("Argument to 'is_set_equivalent_to' must be another $ref_self, not a '$ref_other'.  $other not set equivalent to $self.");
              return 0;
          }
  
@@ -6091,43 +6612,6 @@ sub trace { return main::trace(@_); }
          return;
      }
  
-    sub matches_identically_to {
-        # Return a boolean as to whether or not two tables match identical
-        # sets of code points.
-
-        my $self = shift;
-        my $other = shift;
-        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
-
-        unless ($other->isa(__PACKAGE__)) {
-            my $ref_other = ref $other;
-            my $ref_self = ref $self;
-            Carp::my_carp_bug("Argument to 'matches_identically_to' must be another $ref_self, not a '$ref_other'.  $other not set equivalent to $self.");
-            return 0;
-        }
-
-        # These are ordered in increasing real time to figure out (at least
-        # until a patch changes that and doesn't change this)
-        return 0 if $self->max != $other->max;
-        return 0 if $self->min != $other->min;
-        return 0 if $self->range_count != $other->range_count;
-        return 0 if $self->count != $other->count;
-
-        # Here they could be identical because all the tests above passed.
-        # The loop below is somewhat simpler since we know they have the same
-        # number of elements.  Compare range by range, until reach the end or
-        # find something that differs.
-        my @a_ranges = $self->_range_list->ranges;
-        my @b_ranges = $other->_range_list->ranges;
-        for my $i (0 .. @a_ranges - 1) {
-            my $a = $a_ranges[$i];
-            my $b = $b_ranges[$i];
-            trace "self $a; other $b" if main::DEBUG && $to_trace;
-            return 0 if $a->start != $b->start || $a->end != $b->end;
-        }
-        return 1;
-    }
-
      sub set_equivalent_to {
          # Set $self equivalent to the parameter table.
          # The required Related => 'x' parameter is a boolean indicating
@@ -6138,7 +6622,7 @@ sub trace { return main::trace(@_); }
          # not quite so many.
          # If they are related, one must be a perl extension.  This is because
          # we can't guarantee that Unicode won't change one or the other in a
-        # later release even if they are idential now.
+        # later release even if they are identical now.
  
          my $self = shift;
          my $other = shift;
@@ -6158,18 +6642,27 @@ sub trace { return main::trace(@_); }
  
          # If already are equivalent, no need to re-do it;  if subroutine
          # returns null, it found an error, also do nothing
-        my $are_equivalent = $self->is_equivalent_to($other);
+        my $are_equivalent = $self->is_set_equivalent_to($other);
          return if ! defined $are_equivalent || $are_equivalent;
  
          my $addr = do { no overloading; pack 'J', $self; };
          my $current_leader = ($related) ? $parent{$addr} : $leader{$addr};
  
-        if ($related &&
-            ! $other->perl_extension
-            && ! $current_leader->perl_extension)
-        {
-            Carp::my_carp_bug("set_equivalent_to should have 'Related => 0 for equivalencing two Unicode properties.  Assuming $self is not related to $other");
-            $related = 0;
+        if ($related) {
+            if ($current_leader->perl_extension) {
+                if ($other->perl_extension) {
+                    Carp::my_carp_bug("Use add_alias() to set two Perl tables '$self' and '$other', equivalent.");
+                    return;
+                }
+            } elsif (! $other->perl_extension) {
+                Carp::my_carp_bug("set_equivalent_to should have 'Related => 0 for equivalencing two Unicode properties.  Assuming $self is not related to $other");
+                $related = 0;
+            }
+        }
+
+        if (! $self->is_empty && ! $self->matches_identically_to($other)) {
+            Carp::my_carp_bug("$self should be empty or match identically to $other.  Not setting equivalent");
+            return;
          }
  
          my $leader = do { no overloading; pack 'J', $current_leader; };
@@ -6184,6 +6677,7 @@ sub trace { return main::trace(@_); }
          my $status = $other->status;
          my $status_info = $other->status_info;
          my $matches_all = $matches_all{other_addr};
+        my $caseless_equivalent = $other->caseless_equivalent;
          foreach my $table ($current_leader, @{$equivalents{$leader}}) {
              next if $table == $other;
              trace "setting $other to be the leader of $table, status=$status" if main::DEBUG && $to_trace;
@@ -6197,6 +6691,7 @@ sub trace { return main::trace(@_); }
                  $parent{$table_addr} = $other;
                  push @{$children{$other_addr}}, $table;
                  $table->set_status($status, $status_info);
+                $self->set_caseless_equivalent($caseless_equivalent);
              }
          }
  
@@ -6238,6 +6733,8 @@ sub trace { return main::trace(@_); }
          # ones that share the same file.  It lists all such tables, ordered so
          # that related ones are together.
  
+        return unless $debugging_build;
+
          my $leader = shift;   # Should only be called on the leader table of
                                # an equivalent group
          Carp::carp_extra_args(\@_) if main::DEBUG && @_;
@@ -6634,7 +7131,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
          # each of them is stored in %alias_to_property_of as they are defined.
          # But it's possible that this subroutine will be called with some
          # variant, so if the initial lookup fails, it is repeated with the
-        # standarized form of the input name.  If found, besides returning the
+        # standardized form of the input name.  If found, besides returning the
          # result, the input name is added to the list so future calls won't
          # have to do the conversion again.
  
@@ -6788,7 +7285,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
                          . " argument to '-='.  Subtraction ignored.");
              return $self;
          }
-        elsif ($reversed) {   # Shouldnt happen in a -=, but just in case
+        elsif ($reversed) {   # Shouldn't happen in a -=, but just in case
              Carp::my_carp_bug("Can't cope with a "
              .  __PACKAGE__
              . " being the first parameter in a '-='.  Subtraction ignored.");
@@ -7143,6 +7640,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
                      aliases
                      comment
                      complete_name
+                    containing_range
                      core_access
                      count
                      default_map
@@ -7175,6 +7673,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
                      status
                      status_info
                      to_output_map
+                    type_of
                      value_of
                      write
                  ))
@@ -7209,7 +7708,7 @@ sub join_lines($) {
      # A blank separates the joined lines except if there is a break; an extra
      # blank is inserted after a period ending a line.
  
-    # Intialize the return with the first line.
+    # Initialize the return with the first line.
      my ($return, @lines) = split "\n", shift;
  
      # If the first line is null, it was an empty line, add the \n back in
@@ -7451,19 +7950,15 @@ sub force_unlink ($) {
      return;
  }
  
-sub write ($\@) {
-    # Given a filename and a reference to an array of lines, write the lines
-    # to the file
+sub write ($$@) {
+    # Given a filename and references to arrays of lines, write the lines of
+    # each array to the file
      # Filename can be given as an arrayref of directory names
  
-    my $file  = shift;
-    my $lines_ref = shift;
-    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+    return Carp::carp_too_few_args(\@_, 3) if main::DEBUG && @_ < 3;
  
-    if (! defined $lines_ref) {
-        Carp::my_carp("Missing lines to write parameter for $file.  Writing skipped;");
-        return;
-    }
+    my $file  = shift;
+    my $use_utf8 = shift;
  
      # Get into a single string if an array, and get rid of, in Unix terms, any
      # leading '.'
@@ -7476,10 +7971,6 @@ sub write ($\@) {
  
      push @files_actually_output, $file;
  
-    unless (@$lines_ref) {
-        Carp::my_carp("Output file '$file' is empty; writing it anyway;");
-    }
-
      force_unlink ($file);
  
      my $OUT;
@@ -7488,7 +7979,15 @@ sub write ($\@) {
          return;
      }
  
-    print $OUT @$lines_ref or die Carp::my_carp("write to '$file' failed: $!");
+    binmode $OUT, ":utf8" if $use_utf8;
+
+    while (defined (my $lines_ref = shift)) {
+        unless (@$lines_ref) {
+            Carp::my_carp("An array of lines for writing to file '$file' is empty; writing it anyway;");
+        }
+
+        print $OUT @$lines_ref or die Carp::my_carp("write to '$file' failed: $!");
+    }
      close $OUT or die Carp::my_carp("close '$file' failed: $!");
  
      print "$file written.\n" if $verbosity >= $VERBOSE;
@@ -7511,7 +8010,7 @@ sub Standardize($) {
      $name =~ s/^\s+//g;
      $name =~ s/\s+$//g;
  
-    # Convert interior white space and hypens into underscores.
+    # Convert interior white space and hyphens into underscores.
      $name =~ s/ (?<= .) [ -]+ (.) /_$1/xg;
  
      # Capitalize the letter following an underscore, and convert a sequence of
@@ -7539,6 +8038,30 @@ sub standardize ($) {
      return lc $name;
  }
  
+sub utf8_heavy_name ($$) {
+    # Returns the name that utf8_heavy.pl will use to find a table.  XXX
+    # perhaps this function should be placed somewhere, like Heavy.pl so that
+    # utf8_heavy can use it directly without duplicating code that can get
+    # out-of sync.
+
+    my $table = shift;
+    my $alias = shift;
+    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+    my $property = $table->property;
+    $property = ($property == $perl)
+                ? ""                # 'perl' is never explicitly stated
+                : standardize($property->name) . '=';
+    if ($alias->loose_match) {
+        return $property . standardize($alias->name);
+    }
+    else {
+        return lc ($property . $alias->name);
+    }
+
+    return;
+}
+
  {   # Closure
  
      my $indent_increment = " " x 2;
@@ -7822,7 +8345,7 @@ sub finish_property_setup {
          }
      }
  
-    # This entry is still missing as of 5.2, perhaps because no short name for
+    # This entry is still missing as of 6.0, perhaps because no short name for
      # it.
      if (-e 'NameAliases.txt') {
          my $aliases = property_ref('Name_Alias');
@@ -7854,9 +8377,9 @@ sub finish_property_setup {
      my $fold = property_ref('Case_Folding');
      $fold->set_file('Fold') if defined $fold;
  
-    # utf8.c can't currently cope with non range-size-1 for these, and even if
-    # it were changed to do so, someone else may be using them, expecting the
-    # old style
+    # utf8.c has a different meaning for non range-size-1 for map properties
+    # that this program doesn't currently handle; and even if it were changed
+    # to do so, some other code may be using them expecting range size 1.
      foreach my $property (qw {
                                  Case_Folding
                                  Lowercase_Mapping
@@ -8574,11 +9097,11 @@ sub output_perl_charnames_line ($$) {
          # 0374          ; NFD_QC; N
          # 003C..003E    ; Math
          #
-        # the fields are: "codepoint range ; property; map"
+        # the fields are: "codepoint-range ; property; map"
          #
          # meaning the codepoints in the range all have the value 'map' under
          # 'property'.
-        # Beginning and trailing white space in each field are not signficant.
+        # Beginning and trailing white space in each field are not significant.
          # Note there is not a trailing semi-colon in the above.  A trailing
          # semi-colon means the map is a null-string.  An omitted map, as
          # opposed to a null-string, is assumed to be 'Y', based on Unicode
@@ -8598,8 +9121,8 @@ sub output_perl_charnames_line ($$) {
          # file, in any order, interspersed in any way.  The first time a
          # property is seen, it gets information about that property and
          # caches it for quick retrieval later.  It also normalizes the maps
-        # so that only one of many synonym is stored.  The Unicode input files
-        # do use some multiple synonyms.
+        # so that only one of many synonyms is stored.  The Unicode input
+        # files do use some multiple synonyms.
  
          my $file = shift;
          Carp::carp_extra_args(\@_) if main::DEBUG && @_;
@@ -8908,19 +9431,17 @@ END
  
                  # If the map begins with a special command to us (enclosed in
                  # delimiters), extract the command(s).
-                if (substr($map, 0, 1) eq $CMD_DELIM) {
-                    while ($map =~ s/ ^ $CMD_DELIM (.*?) $CMD_DELIM //x) {
-                        my $command = $1;
-                        if ($command =~  / ^ $REPLACE_CMD= (.*) /x) {
-                            $replace = $1;
-                        }
-                        elsif ($command =~  / ^ $MAP_TYPE_CMD= (.*) /x) {
-                            $map_type = $1;
-                        }
-                        else {
-                           $file->carp_bad_line("Unknown command line: '$1'");
-                           next LINE;
-                        }
+                while ($map =~ s/ ^ $CMD_DELIM (.*?) $CMD_DELIM //x) {
+                    my $command = $1;
+                    if ($command =~  / ^ $REPLACE_CMD= (.*) /x) {
+                        $replace = $1;
+                    }
+                    elsif ($command =~  / ^ $MAP_TYPE_CMD= (.*) /x) {
+                        $map_type = $1;
+                    }
+                    else {
+                        $file->carp_bad_line("Unknown command line: '$1'");
+                        next LINE;
                      }
                  }
              }
@@ -9112,7 +9633,7 @@ END
      # the code point and name on each line.  This was actually the hardest
      # thing to design around.  The code points in those ranges may actually
      # have real maps not given by these two lines.  These maps will either
-    # be algorthimically determinable, or in the extracted files furnished
+    # be algorithmically determinable, or in the extracted files furnished
      # with the UCD.  In the event of conflicts between these extracted files,
      # and this one, Unicode says that this one prevails.  But it shouldn't
      # prevail for conflicts that occur in these ranges.  The data from the
@@ -9135,7 +9656,7 @@ END
          # Name_Alias properties.  (The final duplicates elements of the
          # first.)  A comment for it will later be constructed based on the
          # actual properties present and used
-        Property->new('Perl_Charnames',
+        $perl_charname = Property->new('Perl_Charnames',
                         Core_Access => '\N{...} and "use charnames"',
                         Default_Map => "",
                         Directory => File::Spec->curdir(),
@@ -9149,7 +9670,7 @@ END
          my $Perl_decomp = Property->new('Perl_Decomposition_Mapping',
                                          Directory => File::Spec->curdir(),
                                          File => 'Decomposition',
-                                        Format => $STRING_FORMAT,
+                                        Format => $DECOMP_STRING_FORMAT,
                                          Internal_Only_Warning => 1,
                                          Perl_Extension => 1,
                                          Default_Map => $CODE_POINT,
@@ -9585,7 +10106,6 @@ END
              # essentially be this code.)  This uses the algorithm published by
              # Unicode.
              if (property_ref('Decomposition_Mapping')->to_output_map) {
-        local $to_trace = 1 if main::DEBUG;
                  for (my $S = $SBase; $S < $SBase + $SCount; $S++) {
                      use integer;
                      my $SIndex = $S - $SBase;
@@ -9732,6 +10252,32 @@ END
          }
          return;
      }
+
+    sub filter_v6_ucd {
+
+        # Unicode 6.0 co-opted the name BELL for U+1F514, so change the input
+        # to pretend that U+0007 is ALERT instead, and for Perl 5.14, don't
+        # allow the BELL name for U+1F514, so that the old usage can be
+        # deprecated for one cycle.
+
+        return if $_ !~ /^(?:0007|1F514|070F);/;
+
+        my ($code_point, @fields) = split /\s*;\s*/, $_, -1;
+        if ($code_point eq '0007') {
+            $fields[$CHARNAME] = "ALERT";
+        }
+        elsif ($code_point eq '070F') { # Unicode Corrigendum #8; see
+                            # http://www.unicode.org/versions/corrigendum8.html
+            $fields[$BIDI] = "AL";
+        }
+        elsif ($^V lt v5.17.0) { # For 5.18 will convert to use Unicode's name
+            $fields[$CHARNAME] = "";
+        }
+
+        $_ = join ';', $code_point, @fields;
+
+        return;
+    }
  } # End closure for UnicodeData
  
  sub process_GCB_test {
@@ -9842,105 +10388,147 @@ sub filter_arabic_shaping_line {
      return;
  }
  
-sub setup_special_casing {
-    # SpecialCasing.txt contains the non-simple case change mappings.  The
-    # simple ones are in UnicodeData.txt, which should already have been read
-    # in to the full property data structures, so as to initialize these with
-    # the simple ones.  Then the SpecialCasing.txt entries overwrite the ones
-    # which have different full mappings.
-
-    # This routine sees if the simple mappings are to be output, and if so,
-    # copies what has already been put into the full mapping tables, while
-    # they still contain only the simple mappings.
-
-    # The reason it is done this way is that the simple mappings are probably
-    # not going to be output, so it saves work to initialize the full tables
-    # with the simple mappings, and then overwrite those relatively few
-    # entries in them that have different full mappings, and thus skip the
-    # simple mapping tables altogether.
-
-    my $file= shift;
-    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+{ # Closure
+    my $lc; # Table for lowercase mapping
+    my $tc;
+    my $uc;
+
+    sub setup_special_casing {
+        # SpecialCasing.txt contains the non-simple case change mappings.  The
+        # simple ones are in UnicodeData.txt, which should already have been
+        # read in to the full property data structures, so as to initialize
+        # these with the simple ones.  Then the SpecialCasing.txt entries
+        # overwrite the ones which have different full mappings.
+
+        # This routine sees if the simple mappings are to be output, and if
+        # so, copies what has already been put into the full mapping tables,
+        # while they still contain only the simple mappings.
+
+        # The reason it is done this way is that the simple mappings are
+        # probably not going to be output, so it saves work to initialize the
+        # full tables with the simple mappings, and then overwrite those
+        # relatively few entries in them that have different full mappings,
+        # and thus skip the simple mapping tables altogether.
+
+        my $file= shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
-    # For each of the case change mappings...
-    foreach my $case ('lc', 'tc', 'uc') {
-        my $full = property_ref($case);
-        unless (defined $full && ! $full->is_empty) {
-            Carp::my_carp_bug("Need to process UnicodeData before SpecialCasing.  Only special casing will be generated.");
+        $lc = property_ref('lc');
+        $tc = property_ref('tc');
+        $uc = property_ref('uc');
+
+        # For each of the case change mappings...
+        foreach my $case_table ($lc, $tc, $uc) {
+            my $case = $case_table->name;
+            my $full = property_ref($case);
+            unless (defined $full && ! $full->is_empty) {
+                Carp::my_carp_bug("Need to process UnicodeData before SpecialCasing.  Only special casing will be generated.");
+            }
+
+            # The simple version's name in each mapping merely has an 's' in
+            # front of the full one's
+            my $simple = property_ref('s' . $case);
+            $simple->initialize($full) if $simple->to_output_map();
+
+            my $simple_only = Property->new("_s$case",
+                    Type => $STRING,
+                    Default_Map => $CODE_POINT,
+                    Perl_Extension => 1,
+                    Description => "The simple mappings for $case for code points that have full mappings as well");
+            $simple_only->set_to_output_map($INTERNAL_MAP);
+            $simple_only->add_comment(join_lines( <<END
+This file is for UCD.pm so that it can construct simple mappings that would
+otherwise be lost because they are overridden by full mappings.
+END
+            ));
          }
  
-        # The simple version's name in each mapping merely has an 's' in front
-        # of the full one's
-        my $simple = property_ref('s' . $case);
-        $simple->initialize($case) if $simple->to_output_map();
+        return;
      }
  
-    return;
-}
+    sub filter_special_casing_line {
+        # Change the format of $_ from SpecialCasing.txt into something that
+        # the generic handler understands.  Each input line contains three
+        # case mappings.  This will generate three lines to pass to the
+        # generic handler for each of those.
  
-sub filter_special_casing_line {
-    # Change the format of $_ from SpecialCasing.txt into something that the
-    # generic handler understands.  Each input line contains three case
-    # mappings.  This will generate three lines to pass to the generic handler
-    # for each of those.
-
-    # The input syntax (after stripping comments and trailing white space is
-    # like one of the following (with the final two being entries that we
-    # ignore):
-    # 00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
-    # 03A3; 03C2; 03A3; 03A3; Final_Sigma;
-    # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
-    # Note the trailing semi-colon, unlike many of the input files.  That
-    # means that there will be an extra null field generated by the split
+        # The input syntax (after stripping comments and trailing white space
+        # is like one of the following (with the final two being entries that
+        # we ignore):
+        # 00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
+        # 03A3; 03C2; 03A3; 03A3; Final_Sigma;
+        # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
+        # Note the trailing semi-colon, unlike many of the input files.  That
+        # means that there will be an extra null field generated by the split
  
-    my $file = shift;
-    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+        my $file = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
-    my @fields = split /\s*;\s*/, $_, -1; # -1 => retain trailing null fields
+        my @fields = split /\s*;\s*/, $_, -1; # -1 => retain trailing null
+                                              # fields
+
+        # field #4 is when this mapping is conditional.  If any of these get
+        # implemented, it would be by hard-coding in the casing functions in
+        # the Perl core, not through tables.  But if there is a new condition
+        # we don't know about, output a warning.  We know about all the
+        # conditions through 6.0
+        if ($fields[4] ne "") {
+            my @conditions = split ' ', $fields[4];
+            if ($conditions[0] ne 'tr'  # We know that these languages have
+                                        # conditions, and some are multiple
+                && $conditions[0] ne 'az'
+                && $conditions[0] ne 'lt'
+
+                # And, we know about a single condition Final_Sigma, but
+                # nothing else.
+                && ($v_version gt v5.2.0
+                    && (@conditions > 1 || $conditions[0] ne 'Final_Sigma')))
+            {
+                $file->carp_bad_line("Unknown condition '$fields[4]'.  You should inspect it and either add code to handle it, or add to list of those that are to ignore");
+            }
+            elsif ($conditions[0] ne 'Final_Sigma') {
  
-    # field #4 is when this mapping is conditional.  If any of these get
-    # implemented, it would be by hard-coding in the casing functions in the
-    # Perl core, not through tables.  But if there is a new condition we don't
-    # know about, output a warning.  We know about all the conditions through
-    # 5.2
-    if ($fields[4] ne "") {
-        my @conditions = split ' ', $fields[4];
-        if ($conditions[0] ne 'tr'  # We know that these languages have
-                                    # conditions, and some are multiple
-            && $conditions[0] ne 'az'
-            && $conditions[0] ne 'lt'
-
-            # And, we know about a single condition Final_Sigma, but
-            # nothing else.
-            && ($v_version gt v5.2.0
-                && (@conditions > 1 || $conditions[0] ne 'Final_Sigma')))
-        {
-            $file->carp_bad_line("Unknown condition '$fields[4]'.  You should inspect it and either add code to handle it, or add to list of those that are to ignore");
+                    # Don't print out a message for Final_Sigma, because we
+                    # have hard-coded handling for it.  (But the standard
+                    # could change what the rule should be, but it wouldn't
+                    # show up here anyway.
+
+                    print "# SKIPPING Special Casing: $_\n"
+                                                    if $verbosity >= $VERBOSE;
+            }
+            $_ = "";
+            return;
+        }
+        elsif (@fields > 6 || (@fields == 6 && $fields[5] ne "" )) {
+            $file->carp_bad_line('Extra fields');
+            $_ = "";
+            return;
          }
-        elsif ($conditions[0] ne 'Final_Sigma') {
  
-                # Don't print out a message for Final_Sigma, because we have
-                # hard-coded handling for it.  (But the standard could change
-                # what the rule should be, but it wouldn't show up here
-                # anyway.
+        $_ = "$fields[0]; lc; $fields[1]";
+        $file->insert_adjusted_lines("$fields[0]; tc; $fields[2]");
+        $file->insert_adjusted_lines("$fields[0]; uc; $fields[3]");
  
-                print "# SKIPPING Special Casing: $_\n"
-                                                    if $verbosity >= $VERBOSE;
+        # Copy any simple case change to the special tables constructed if
+        # being overridden by a multi-character case change.
+        if ($fields[1] ne $fields[0]
+            && (my $value = $lc->value_of(hex $fields[0])) ne $CODE_POINT)
+        {
+            $file->insert_adjusted_lines("$fields[0]; _slc; $value");
          }
-        $_ = "";
-        return;
-    }
-    elsif (@fields > 6 || (@fields == 6 && $fields[5] ne "" )) {
-        $file->carp_bad_line('Extra fields');
-        $_ = "";
+        if ($fields[2] ne $fields[0]
+            && (my $value = $tc->value_of(hex $fields[0])) ne $CODE_POINT)
+        {
+            $file->insert_adjusted_lines("$fields[0]; _stc; $value");
+        }
+        if ($fields[3] ne $fields[0]
+            && (my $value = $uc->value_of(hex $fields[0])) ne $CODE_POINT)
+        {
+            $file->insert_adjusted_lines("$fields[0]; _suc; $value");
+        }
+
          return;
      }
-
-    $_ = "$fields[0]; lc; $fields[1]";
-    $file->insert_adjusted_lines("$fields[0]; tc; $fields[2]");
-    $file->insert_adjusted_lines("$fields[0]; uc; $fields[3]");
-
-    return;
  }
  
  sub filter_old_style_case_folding {
@@ -9980,14 +10568,6 @@ sub filter_old_style_case_folding {
      # it takes no part in anything we do.
      my $to_output_simple;
  
-    # These are experimental, perhaps will need these to pass to regcomp.c to
-    # handle the cases where for example the Kelvin sign character folds to k,
-    # and in regcomp, we need to know which of the characters can have a
-    # non-latin1 char fold to it, so it doesn't do the optimizations it might
-    # otherwise.
-    my @latin1_singly_folded;
-    my @latin1_folded;
-
      sub setup_case_folding($) {
          # Read in the case foldings in CaseFolding.txt.  This handles both
          # simple and full case folding.
@@ -10030,16 +10610,18 @@ sub filter_old_style_case_folding {
          }
  
          # C: complete, F: full, or I: dotted uppercase I -> dotless lowercase
-        # I are all full foldings
-        if ($type eq 'C' || $type eq 'F' || $type eq 'I') {
-            $_ = "$range; Case_Folding; $map";
+        # I are all full foldings; S is single-char.  For S, there is always
+        # an F entry, so we must allow multiple values for the same code
+        # point.  Fortunately this table doesn't need further manipulation
+        # which would preclude using multiple-values.  The S is now included
+        # so that _swash_inversion_hash() is able to construct closures
+        # without having to worry about F mappings.
+        if ($type eq 'C' || $type eq 'F' || $type eq 'I' || $type eq 'S') {
+            $_ = "$range; Case_Folding; $CMD_DELIM$REPLACE_CMD=$MULTIPLE$CMD_DELIM$map";
          }
          else {
              $_ = "";
-            if ($type ne 'S') {
-               $file->carp_bad_line('Expecting C F I S or T in second field');
-               return;
-            }
+            $file->carp_bad_line('Expecting C F I S or T in second field');
          }
  
          # C and S are simple foldings, but simple case folding is not needed
@@ -10048,31 +10630,9 @@ sub filter_old_style_case_folding {
              $file->insert_adjusted_lines("$range; Simple_Case_Folding; $map");
          }
  
-        # Experimental, see comment above
-        if ($type ne 'S' && hex($range) >= 256) {   # assumes range is 1 point
-            my @folded = split ' ', $map;
-            if (hex $folded[0] < 256 && @folded == 1) {
-                push @latin1_singly_folded, hex $folded[0];
-            }
-            foreach my $folded (@folded) {
-                push @latin1_folded, hex $folded if hex $folded < 256;
-            }
-        }
-
          return;
      }
  
-    sub post_fold {
-        # Experimental, see comment above
-        return;
-
-        #local $to_trace = 1 if main::DEBUG;
-        @latin1_singly_folded = uniques(@latin1_singly_folded);
-        @latin1_folded = uniques(@latin1_folded);
-        trace "latin1 single folded:", map { chr $_ } sort { $a <=> $b } @latin1_singly_folded if main::DEBUG && $to_trace;
-        trace "latin1 folded:", map { chr $_ } sort { $a <=> $b } @latin1_folded if main::DEBUG && $to_trace;
-        return;
-    }
  } # End case fold closure
  
  sub filter_jamo_line {
@@ -10398,7 +10958,7 @@ sub filter_blocks_lines {
          #                                one.
          #   Titlecase                    duplicates UnicodeData.txt: gc=lt
          #   Unassigned Code Value        duplicates UnicodeData.txt: gc=cc
-        #   Zero-width                   never made into offical property;
+        #   Zero-width                   never made into official property;
          #                                subset of gc=cf
          # Most of the properties have the same names in this file as in later
          # versions, but a couple do not.
@@ -10539,7 +11099,8 @@ sub finish_Unicode() {
  
                  # Add mappings to the property for each code point in the list
                  foreach my $range ($list->ranges) {
-                    $property->add_map($range->start, $range->end, $default);
+                    $property->add_map($range->start, $range->end, $default,
+                    Replace => $CROAK);
                  }
              }
  
@@ -10566,11 +11127,12 @@ sub finish_Unicode() {
          }
  
          # Add any remaining code points to the mapping, using the default for
-        # missing code points
+        # missing code points.
          if (defined (my $default_map = $property->default_map)) {
-            foreach my $range ($property->inverse_list->ranges) {
-                $property->add_map($range->start, $range->end, $default_map);
-            }
+
+            # This fills in any missing values with the default.
+            $property->add_map(0, $LAST_UNICODE_CODEPOINT,
+                               $default_map, Replace => $NO);
  
              # Make sure there is a match table for the default
              if (! defined $property->table($default_map)) {
@@ -10663,19 +11225,17 @@ END
          $LC->initialize($gc->table('Ll') + $gc->table('Lu'));
  
          # Lt not in release 1.
-        $LC += $gc->table('Lt') if defined $gc->table('Lt');
+        if (defined $gc->table('Lt')) {
+            $LC += $gc->table('Lt');
+            $gc->table('Lt')->set_caseless_equivalent($LC);
+        }
      }
      $LC->add_description('[\p{Ll}\p{Lu}\p{Lt}]');
  
+    $gc->table('Ll')->set_caseless_equivalent($LC);
+    $gc->table('Lu')->set_caseless_equivalent($LC);
+
      my $Cs = $gc->table('Cs');
-    if (defined $Cs) {
-        $Cs->add_note('Mostly not usable in Perl.');
-        $Cs->add_comment(join_lines(<<END
-Surrogates are used exclusively for I/O in UTF-16, and should not appear in
-Unicode text, and hence their use will generate (usually fatal) messages
-END
-        ));
-    }
  
  
      # Folding information was introduced later into Unicode data.  To get
@@ -10723,7 +11283,8 @@ sub compile_perl() {
      # range, with their names prefaced by 'Posix', to signify that these match
      # what the Posix standard says they should match.  A couple are
      # effectively this, but the name doesn't have 'Posix' in it because there
-    # just isn't any Posix equivalent.
+    # just isn't any Posix equivalent.  'XPosix' are the Posix tables extended
+    # to the full Unicode range, by our guesses as to what is appropriate.
  
      # 'Any' is all code points.  As an error check, instead of just setting it
      # to be that, construct it to be the union of all the major categories
@@ -10783,12 +11344,17 @@ sub compile_perl() {
      my $Unicode_Lower = property_ref('Lowercase');
      if (defined $Unicode_Lower && ! $Unicode_Lower->is_empty) {
          $Lower->set_equivalent_to($Unicode_Lower->table('Y'), Related => 1);
+        $Unicode_Lower->table('Y')->set_caseless_equivalent(property_ref('Cased')->table('Y'));
+        $Unicode_Lower->table('N')->set_caseless_equivalent(property_ref('Cased')->table('N'));
+        $Lower->set_caseless_equivalent(property_ref('Cased')->table('Y'));
+
      }
      else {
          $Lower->set_equivalent_to($gc->table('Lowercase_Letter'),
                                                                  Related => 1);
      }
-    $perl->add_match_table("PosixLower",
+    $Lower->add_alias('XPosixLower');
+    my $Posix_Lower = $perl->add_match_table("PosixLower",
                              Description => "[a-z]",
                              Initialize => $Lower & $ASCII,
                              );
@@ -10797,12 +11363,16 @@ sub compile_perl() {
      my $Unicode_Upper = property_ref('Uppercase');
      if (defined $Unicode_Upper && ! $Unicode_Upper->is_empty) {
          $Upper->set_equivalent_to($Unicode_Upper->table('Y'), Related => 1);
+        $Unicode_Upper->table('Y')->set_caseless_equivalent(property_ref('Cased')->table('Y'));
+        $Unicode_Upper->table('N')->set_caseless_equivalent(property_ref('Cased')->table('N'));
+        $Upper->set_caseless_equivalent(property_ref('Cased')->table('Y'));
      }
      else {
          $Upper->set_equivalent_to($gc->table('Uppercase_Letter'),
                                                                  Related => 1);
      }
-    $perl->add_match_table("PosixUpper",
+    $Upper->add_alias('XPosixUpper');
+    my $Posix_Upper = $perl->add_match_table("PosixUpper",
                              Description => "[A-Z]",
                              Initialize => $Upper & $ASCII,
                              );
@@ -10810,21 +11380,27 @@ sub compile_perl() {
      # Earliest releases didn't have title case.  Initialize it to empty if not
      # otherwise present
      my $Title = $perl->add_match_table('Title');
+    $Title->add_alias('Titlecase');
      my $lt = $gc->table('Lt');
-    if (defined $lt) {
-        $Title->set_equivalent_to($lt, Related => 1);
-    }
+
+    # Earlier versions of mktables had this related to $lt since they have
+    # identical code points, but their casefolds are not equivalent, and so
+    # now must be kept as separate entities.
+    $Title += $lt if defined $lt;
  
      # If this Unicode version doesn't have Cased, set up our own.  From
      # Unicode 5.1: Definition D120: A character C is defined to be cased if
      # and only if C has the Lowercase or Uppercase property or has a
      # General_Category value of Titlecase_Letter.
-    unless (defined property_ref('Cased')) {
+    my $Unicode_Cased = property_ref('Cased');
+    unless (defined $Unicode_Cased) {
          my $cased = $perl->add_match_table('Cased',
                          Initialize => $Lower + $Upper + $Title,
                          Description => 'Uppercase or Lowercase or Titlecase',
                          );
+        $Unicode_Cased = $cased;
      }
+    $Title->set_caseless_equivalent($Unicode_Cased->table('Y'));
  
      # Similarly, set up our own Case_Ignorable property if this Unicode
      # version doesn't have it.  From Unicode 5.1: Definition D121: A character
@@ -10896,32 +11472,39 @@ sub compile_perl() {
          $Alpha += $gc->table('Nl') if defined $gc->table('Nl');
          $Alpha->add_description('Alphabetic');
      }
-    $perl->add_match_table("PosixAlpha",
+    $Alpha->add_alias('XPosixAlpha');
+    my $Posix_Alpha = $perl->add_match_table("PosixAlpha",
                              Description => "[A-Za-z]",
                              Initialize => $Alpha & $ASCII,
                              );
+    $Posix_Upper->set_caseless_equivalent($Posix_Alpha);
+    $Posix_Lower->set_caseless_equivalent($Posix_Alpha);
  
      my $Alnum = $perl->add_match_table('Alnum',
                          Description => 'Alphabetic and (Decimal) Numeric',
                          Initialize => $Alpha + $gc->table('Decimal_Number'),
                          );
+    $Alnum->add_alias('XPosixAlnum');
      $perl->add_match_table("PosixAlnum",
                              Description => "[A-Za-z0-9]",
                              Initialize => $Alnum & $ASCII,
                              );
  
      my $Word = $perl->add_match_table('Word',
-                                Description => '\w, including beyond ASCII',
+                                Description => '\w, including beyond ASCII;'
+                                            . ' = \p{Alnum} + \pM + \p{Pc}',
                                  Initialize => $Alnum + $gc->table('Mark'),
                                  );
+    $Word->add_alias('XPosixWord');
      my $Pc = $gc->table('Connector_Punctuation'); # 'Pc' Not in release 1
      $Word += $Pc if defined $Pc;
  
      # This is a Perl extension, so the name doesn't begin with Posix.
-    $perl->add_match_table('PerlWord',
+    my $PerlWord = $perl->add_match_table('PerlWord',
                      Description => '\w, restricted to ASCII = [A-Za-z0-9_]',
                      Initialize => $Word & $ASCII,
                      );
+    $PerlWord->add_alias('PosixWord');
  
      my $Blank = $perl->add_match_table('Blank',
                                  Description => '\h, Horizontal white space',
@@ -10934,6 +11517,7 @@ sub compile_perl() {
                                              -   0x200B, # ZWSP
                                  );
      $Blank->add_alias('HorizSpace');        # Another name for it.
+    $Blank->add_alias('XPosixBlank');
      $perl->add_match_table("PosixBlank",
                              Description => "\\t and ' '",
                              Initialize => $Blank & $ASCII,
@@ -10955,24 +11539,28 @@ sub compile_perl() {
                  Description => '\s including beyond ASCII plus vertical tab',
                  Initialize => $Blank + $VertSpace,
      );
+    $Space->add_alias('XPosixSpace');
      $perl->add_match_table("PosixSpace",
                              Description => "\\t, \\n, \\cK, \\f, \\r, and ' '.  (\\cK is vertical tab)",
                              Initialize => $Space & $ASCII,
                              );
  
      # Perl's traditional space doesn't include Vertical Tab
-    my $SpacePerl = $perl->add_match_table('SpacePerl',
+    my $XPerlSpace = $perl->add_match_table('XPerlSpace',
                                    Description => '\s, including beyond ASCII',
                                    Initialize => $Space - 0x000B,
                                  );
-    $perl->add_match_table('PerlSpace',
-                            Description => '\s, restricted to ASCII',
-                            Initialize => $SpacePerl & $ASCII,
+    $XPerlSpace->add_alias('SpacePerl');    # A pre-existing synonym
+    my $PerlSpace = $perl->add_match_table('PerlSpace',
+                        Description => '\s, restricted to ASCII = [ \f\n\r\t]',
+                        Initialize => $XPerlSpace & $ASCII,
                              );
  
+
      my $Cntrl = $perl->add_match_table('Cntrl',
                                          Description => 'Control characters');
      $Cntrl->set_equivalent_to($gc->table('Cc'), Related => 1);
+    $Cntrl->add_alias('XPosixCntrl');
      $perl->add_match_table("PosixCntrl",
                              Description => "ASCII control characters: NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, BS, HT, LF, VT, FF, CR, SO, SI, DLE, DC1, DC2, DC3, DC4, NAK, SYN, ETB, CAN, EOM, SUB, ESC, FS, GS, RS, US, and DEL",
                              Initialize => $Cntrl & $ASCII,
@@ -10989,35 +11577,42 @@ sub compile_perl() {
                          Description => 'Characters that are graphical',
                          Initialize => ~ ($Space + $controls),
                          );
+    $Graph->add_alias('XPosixGraph');
      $perl->add_match_table("PosixGraph",
                              Description =>
                                  '[-!"#$%&\'()*+,./:;<>?@[\\\]^_`{|}~0-9A-Za-z]',
                              Initialize => $Graph & $ASCII,
                              );
  
-    my $Print = $perl->add_match_table('Print',
+    $print = $perl->add_match_table('Print',
                          Description => 'Characters that are graphical plus space characters (but no controls)',
                          Initialize => $Blank + $Graph - $gc->table('Control'),
                          );
+    $print->add_alias('XPosixPrint');
      $perl->add_match_table("PosixPrint",
                              Description =>
                                '[- 0-9A-Za-z!"#$%&\'()*+,./:;<>?@[\\\]^_`{|}~]',
-                            Initialize => $Print & $ASCII,
+                            Initialize => $print & $ASCII,
                              );
  
      my $Punct = $perl->add_match_table('Punct');
      $Punct->set_equivalent_to($gc->table('Punctuation'), Related => 1);
  
      # \p{punct} doesn't include the symbols, which posix does
+    my $XPosixPunct = $perl->add_match_table('XPosixPunct',
+                    Description => '\p{Punct} + ASCII-range \p{Symbol}',
+                    Initialize => $gc->table('Punctuation')
+                                + ($ASCII & $gc->table('Symbol')),
+        );
      $perl->add_match_table('PosixPunct',
          Description => '[-!"#$%&\'()*+,./:;<>?@[\\\]^_`{|}~]',
-        Initialize => $ASCII & ($gc->table('Punctuation')
-                                + $gc->table('Symbol')),
+        Initialize => $ASCII & $XPosixPunct,
          );
  
      my $Digit = $perl->add_match_table('Digit',
-                            Description => '\d, extended beyond just [0-9]');
+                            Description => '[0-9] + all other decimal digits');
      $Digit->set_equivalent_to($gc->table('Decimal_Number'), Related => 1);
+    $Digit->add_alias('XPosixDigit');
      my $PosixDigit = $perl->add_match_table("PosixDigit",
                                              Description => '[0-9]',
                                              Initialize => $Digit & $ASCII,
@@ -11025,6 +11620,7 @@ sub compile_perl() {
  
      # Hex_Digit was not present in first release
      my $Xdigit = $perl->add_match_table('XDigit');
+    $Xdigit->add_alias('XPosixXDigit');
      my $Hex = property_ref('Hex_Digit');
      if (defined $Hex && ! $Hex->is_empty) {
          $Xdigit->set_equivalent_to($Hex->table('Y'), Related => 1);
@@ -11037,6 +11633,17 @@ sub compile_perl() {
          $Xdigit->add_description('[0-9A-Fa-f] and corresponding fullwidth versions, like U+FF10: FULLWIDTH DIGIT ZERO');
      }
  
+    # AHex was not present in early releases
+    my $PosixXDigit = $perl->add_match_table('PosixXDigit');
+    my $AHex = property_ref('ASCII_Hex_Digit');
+    if (defined $AHex && ! $AHex->is_empty) {
+        $PosixXDigit->set_equivalent_to($AHex->table('Y'), Related => 1);
+    }
+    else {
+        $PosixXDigit->initialize($Xdigit & $ASCII);
+    }
+    $PosixXDigit->add_description('[0-9A-Fa-f]');
+
      my $dt = property_ref('Decomposition_Type');
      $dt->add_match_table('Non_Canon', Full_Name => 'Non_Canonical',
          Initialize => ~ ($dt->table('None') + $dt->table('Canonical')),
@@ -11142,7 +11749,6 @@ sub compile_perl() {
          $lv_lvt_v->add_comment('For use in \X; matches: HST=LV | HST=LVT | HST=V');
      }
  
-    my $perl_charname = property_ref('Perl_Charnames');
      # Was previously constructed to contain both Name and Unicode_1_Name
      my @composition = ('Name', 'Unicode_1_Name');
  
@@ -11168,7 +11774,7 @@ sub compile_perl() {
          $alias_sentence = <<END;
  The Name_Alias property adds duplicate code point entries with a corrected
  name.  The original (less correct, but still valid) name will be physically
-first.
+last.
  END
      }
      my $comment;
@@ -11180,27 +11786,6 @@ END
          $comment .= ", and $composition[-1]";
      }
  
-    # Wait for charnames to catch up
-#    foreach my $entry (@more_Names,
-#                        split "\n", <<"END"
-#000A; LF
-#000C; FF
-#000D; CR
-#0085; NEL
-#200C; ZWNJ
-#200D; ZWJ
-#FEFF; BOM
-#FEFF; BYTE ORDER MARK
-#END
-#    ) {
-#        #local $to_trace = 1 if main::DEBUG;
-#        trace $entry if main::DEBUG && $to_trace;
-#        my ($code_point, $name) = split /\s*;\s*/, $entry;
-#        $code_point = hex $code_point;
-#        trace $code_point, $name if main::DEBUG && $to_trace;
-#        $perl_charname->add_duplicate($code_point, $name);
-#    }
-#    #$perl_charname->add_comment("This file is for charnames.pm.  It is the union of the $comment properties, plus certain commonly used but unofficial names, such as 'FF' and 'ZWNJ'.  Unicode_1_Name entries are used only for otherwise nameless code points.$alias_sentence");
      $perl_charname->add_comment(join_lines( <<END
  This file is for charnames.pm.  It is the union of the $comment properties.
  Unicode_1_Name entries are used only for otherwise nameless code
@@ -11223,7 +11808,7 @@ END
                              File => 'CombiningClass',
                              Directory => File::Spec->curdir(),
                              );
-    $perl_ccc->set_to_output_map(1);
+    $perl_ccc->set_to_output_map($EXTERNAL_MAP);
      $perl_ccc->add_comment(join_lines(<<END
  This mapping is for normalize.pm.  It is currently identical to the Unicode
  Canonical_Combining_Class property.
@@ -11273,7 +11858,7 @@ END
          my $description_start = "Code point's usage introduced in version ";
          $first_age->add_description($description_start . $first_age->name);
  
-        # To construct the accumlated values, for each of the age tables
+        # To construct the accumulated values, for each of the age tables
          # starting with the 2nd earliest, merge the earliest with it, to get
          # all those code points existing in the 2nd earliest.  Repeat merging
          # the new 2nd earliest with the 3rd earliest to get all those existing
@@ -11327,6 +11912,24 @@ END
          }
      }
  
+    # Here done with all the basic stuff.  Ready to populate the information
+    # about each character if annotating them.
+    if ($annotate) {
+
+        # See comments at its declaration
+        $annotate_ranges = Range_Map->new;
+
+        # This separates out the non-characters from the other unassigneds, so
+        # can give different annotations for each.
+        $unassigned_sans_noncharacters = Range_List->new(
+         Initialize => $gc->table('Unassigned')
+                       & property_ref('Noncharacter_Code_Point')->table('N'));
+
+        for (my $i = 0; $i <= $LAST_UNICODE_CODEPOINT; $i++ ) {
+            $i = populate_char_info($i);    # Note sets $i so may cause skips
+        }
+    }
+
      return;
  }
  
@@ -11492,7 +12095,7 @@ sub add_perl_synonyms() {
                  # name.  We could be in trouble, but not if this is just a
                  # synonym for another table that we have already made a child
                  # of the pre-existing one.
-                if ($pre_existing->is_equivalent_to($actual)) {
+                if ($pre_existing->is_set_equivalent_to($actual)) {
                      trace "$pre_existing is already equivalent to $actual; adding alias perl=$proposed_name to it" if main::DEBUG && $to_trace;
                      $pre_existing->add_alias($proposed_name);
                      next;
@@ -11592,7 +12195,7 @@ END
  
  sub register_file_for_name($$$) {
      # Given info about a table and a datafile that it should be associated
-    # with, register that assocation
+    # with, register that association
  
      my $table = shift;
      my $directory_ref = shift;   # Array of the directory path for the file
@@ -11637,19 +12240,18 @@ sub register_file_for_name($$$) {
          my $deprecated = ($table->status eq $DEPRECATED)
                           ? $table->status_info
                           : "";
+        my $caseless_equivalent = $table->caseless_equivalent;
  
          # And for each of the table's aliases...  This inner loop eventually
          # goes through all aliases in the UCD that we generate regex match
          # files for
          foreach my $alias ($table->aliases) {
-            my $name = $alias->name;
+            my $standard = utf8_heavy_name($table, $alias);
  
              # Generate an entry in either the loose or strict hashes, which
              # will translate the property and alias names combination into the
              # file where the table for them is stored.
-            my $standard;
              if ($alias->loose_match) {
-                $standard = $property . standardize($alias->name);
                  if (exists $loose_to_file_of{$standard}) {
                      Carp::my_carp("Can't change file registered to $loose_to_file_of{$standard} to '$sub_filename'.");
                  }
@@ -11658,7 +12260,6 @@ sub register_file_for_name($$$) {
                  }
              }
              else {
-                $standard = lc ($property . $name);
                  if (exists $stricter_to_file_of{$standard}) {
                      Carp::my_carp("Can't change file registered to $stricter_to_file_of{$standard} to '$sub_filename'.");
                  }
@@ -11671,7 +12272,7 @@ sub register_file_for_name($$$) {
                      # will work.  Also note that this assumes that such a
                      # number is matched strictly; so if that were to change,
                      # this would be wrong.
-                    if ((my $integer_name = $name)
+                    if ((my $integer_name = $alias->name)
                              =~ s/^ ( -? \d+ ) \.0+ $ /$1/x)
                      {
                          $stricter_to_file_of{$property . $integer_name}
@@ -11684,6 +12285,11 @@ sub register_file_for_name($$$) {
              if ($deprecated) {
                  $utf8::why_deprecated{$sub_filename} = $deprecated;
              }
+
+            # And a substitute table, if any, for case-insensitive matching
+            if ($caseless_equivalent != 0) {
+                $caseless_equivalent_to{$standard} = $caseless_equivalent;
+            }
          }
      }
  
@@ -11884,6 +12490,7 @@ sub make_table_pod_entries($) {
      my $string_count = clarify_number($count);
      my $status = $input_table->status;
      my $status_info = $input_table->status_info;
+    my $caseless_equivalent = $input_table->caseless_equivalent;
  
      my $entry_for_first_table; # The entry for the first table output.
                             # Almost certainly, it is the parent.
@@ -12096,7 +12703,6 @@ sub make_table_pod_entries($) {
                  # expression, but with only one of 'Single', 'Short' if there
                  # are both items.
                  if ($short_name || $single_form || $table->conflicting) {
-                    $parenthesized .= '(';
                      $parenthesized .= "Short: $short_name" if $short_name;
                      if ($short_name && $single_form) {
                          $parenthesized .= ', ';
@@ -12108,6 +12714,11 @@ sub make_table_pod_entries($) {
                  }
              }
  
+            if ($caseless_equivalent != 0) {
+                $parenthesized .=  '; ' if $parenthesized ne "";
+                $parenthesized .= "/i= " . $caseless_equivalent->complete_name;
+            }
+
  
              # Warn if this property isn't the same as one that a
              # semi-casual user might expect.  The other components of this
@@ -12116,18 +12727,16 @@ sub make_table_pod_entries($) {
              # to go on every entry.
              my $conflicting = join " NOR ", $table->conflicting;
              if ($conflicting) {
-                $parenthesized .= '(' if ! $parenthesized;
-                $parenthesized .=  '; ' if $parenthesized ne '(';
+                $parenthesized .=  '; ' if $parenthesized ne "";
                  $parenthesized .= "NOT $conflicting";
              }
-            $parenthesized .= ')' if $parenthesized;
  
-            push @info, $parenthesized if $parenthesized;
+            push @info, "($parenthesized)" if $parenthesized;
  
              if ($table_property != $perl && $table->perl_extension) {
                  push @info, '(Perl extension)';
              }
-            push @info, "($string_count)" if $output_range_counts;
+            push @info, "($string_count)";
  
              # Now, we have both the entry and info so add them to the
              # list of all the properties.
@@ -12358,6 +12967,10 @@ END
          # directory.
          my @path = $property->file_path;
          next if $path[0] ne $map_directory;
+
+        # Don't mention map tables that are for internal-use only
+        next if $property->to_output_map == $INTERNAL_MAP;
+
          shift @path;    # Remove the standard name
  
          my $file = join '/', @path; # In case is in sub directory
@@ -12392,9 +13005,11 @@ END
      # Generate a list of the formats that can appear in the map tables.
      my @map_table_formats;
      foreach my $format (sort keys %map_table_formats) {
-        push @map_table_formats, " $format    $map_table_formats{$format}\n";
+        push @map_table_formats, "  $format    $map_table_formats{$format}\n";
      }
  
+    local $" = "";
+
      # Everything is ready to assemble.
      my @OUT = << "END";
  =begin comment
@@ -12424,7 +13039,7 @@ Perl extension.  There is some detail about Blocks, Scripts, General_Category,
  and Bidi_Class in L<perlunicode>, but to find out about the intricacies of the
  Unicode properties, refer to the Unicode standard.  A good starting place is
  L<$unicode_reference_url>.  More information on the Perl extensions is in
-L<perlrecharclass>.
+L<perlunicode/Other Properties>.
  
  Note that you can define your own properties; see
  L<perlunicode/"User-Defined Character Properties">.
@@ -12438,7 +13053,7 @@ both single and compound forms.
  B<Compound forms> consist of two components, separated by an equals sign or a
  colon.  The first component is the property name, and the second component is
  the particular value of the property to match against, for example,
-'\\p{Script: Greek}' or '\\p{Script=Greek}' both mean to match characters
+'\\p{Script: Greek}' and '\\p{Script=Greek}' both mean to match characters
  whose Script property is Greek.
  
  B<Single forms>, like '\\p{Greek}', are mostly Perl-defined shortcuts for
@@ -12489,30 +13104,29 @@ adjacent to (but within) the braces and the colon or equal sign.
  
  =back
  
-Some properties are considered obsolete, but still available.  There are
-several varieties of obsolesence:
+Some properties are considered obsolete by Unicode, but still available.
+There are several varieties of obsolescence:
  
  =over 4
  
  =item Obsolete
  
  Properties marked with $a_bold_obsolete in the table are considered
-obsolete.  At the time of this writing (Unicode version 5.2) there is no
-information in the Unicode standard about the implications of a property being
  obsolete.
  
  =item Stabilized
  
-Obsolete properties may be stabilized.  This means that they are not actively
-maintained by Unicode, and will not be extended as new characters are added to
-the standard.  Such properties are marked with $a_bold_stabilized in the
-table.  At the time of this writing (Unicode version 5.2) there is no further
-information in the Unicode standard about the implications of a property being
-stabilized.
+Obsolete properties may be stabilized.  Such a determination does not indicate
+that the property should or should not be used; instead it is a declaration
+that the property will not be maintained nor extended for newly encoded
+characters.  Such properties are marked with $a_bold_stabilized in the
+table.
  
  =item Deprecated
  
-Obsolete properties may be deprecated.  This means that their use is strongly
+An obsolete property may be deprecated, perhaps because its original intent
+has been replaced by another property, or because its specification was
+somehow defective.  This means that its use is strongly
  discouraged, so much so that a warning will be issued if used, unless the
  regular expression is in the scope of a C<S<no warnings 'deprecated'>>
  statement.  $A_bold_deprecated flags each such entry in the table, and
@@ -12535,7 +13149,7 @@ flags each such entry in the table.
  @block_warning
  
  The table below has two columns.  The left column contains the \\p{}
-constructs to look up, possibly preceeded by the flags mentioned above; and
+constructs to look up, possibly preceded by the flags mentioned above; and
  the right column contains information about them, like a description, or
  synonyms.  It shows both the single and compound forms for each property that
  has them.  If the left column is a short name for a property, the right column
@@ -12553,6 +13167,15 @@ Numbers in (parentheses) indicate the total number of code points matched by
  the property.  For emphasis, those properties that match no code points at all
  are listed as well in a separate section following the table.
  
+Most properties match the same code points regardless of whether C<"/i">
+case-insensitive matching is specified or not.  But a few properties are
+affected.  These are shown with the notation
+
+ (/i= other_property)
+
+in the second column.  Under case-insensitive matching they match the
+same code pode points as the property "other_property".
+
  There is no description given for most non-Perl defined properties (See
  $unicode_reference_url for that).
  
@@ -12612,6 +13235,7 @@ $zero_matches
  
  A few properties are accessible in Perl via various function calls only.
  These are:
+
   Lowercase_Mapping          lc() and lcfirst()
   Titlecase_Mapping          ucfirst()
   Uppercase_Mapping          uc()
@@ -12636,42 +13260,31 @@ the properties are listed enclosed in (parentheses).
  
  =back
  
-An installation can choose to allow any of these to be matched by changing the
-controlling lists contained in the program C<\$Config{privlib}>/F<unicore/$0>
-and then re-running F<$0>.  (C<\%Config> is available from the Config module).
+An installation can choose to allow any of these to be matched by downloading
+the Unicode database from L<http://www.unicode.org/Public/> to
+C<\$Config{privlib}>/F<unicore/> in the Perl source tree, changing the
+controlling lists contained in the program
+C<\$Config{privlib}>/F<unicore/mktables> and then re-compiling and installing.
+(C<\%Config> is available from the Config module).
  
  =head1 Files in the I<To> directory (for serious hackers only)
  
  All Unicode properties are really mappings (in the mathematical sense) from
  code points to their respective values.  As part of its build process,
  Perl constructs tables containing these mappings for all properties that it
-deals with.  But only a few of these are written out into files.
+deals with.  Some, but not all, of these are written out into files.
  Those written out are in the directory C<\$Config{privlib}>/F<unicore/To/>
  (%Config is available from the Config module).
  
-Those ones written are ones needed by Perl internally during execution, or for
-which there is some demand, and those for which there is no access through the
-Perl core.  Generally, properties that can be used in regular expression
-matching do not have their map tables written, like Script.  Nor are the
-simplistic properties that have a better, more complete version, such as
-Simple_Uppercase_Mapping  (Uppercase_Mapping is written instead).
-
-None of the properties in the I<To> directory are currently directly
-accessible through the Perl core, although some may be accessed indirectly.
-For example, the uc() function implements the Uppercase_Mapping property and
-uses the F<Upper.pl> file found in this directory.
-
-The available files with their properties (short names in parentheses),
-and any flags or comments about them, are:
+Perl reserves the right to change the format and even the existence of any of
+those files without notice, except the ones that were in existence prior to
+release 5.13.  If those change, a deprecation cycle will be done first.  These
+are:
  
  @map_tables_actually_output
  
-An installation can choose to change which files are generated by changing the
-controlling lists contained in the program C<\$Config{privlib}>/F<unicore/$0>
-and then re-running F<$0>.
-
-Each of these files defines two hash entries to help reading programs decipher
-it.  One of them looks like this:
+Each of the files in this directory defines two hash entries to help reading
+programs decipher it.  One of them looks like this:
  
      \$utf8::SwashInfo{'ToNAME'}{'format'} = 's';
  
@@ -12680,7 +13293,7 @@ this is not necessarily the property's official Unicode name.  (The 'To' is
  also for backwards compatibility.)  The hash entry gives the format of the
  mapping fields of the table, currently one of the following:
  
- @map_table_formats
+@map_table_formats
  
  This format applies only to the entries in the main body of the table.
  Entries defined in hashes or ones that are missing from the list can have a
@@ -12708,8 +13321,8 @@ L<perlunicode>
  
  END
  
-    # And write it.
-    main::write([ $pod_directory, "$pod_file.pod" ], @OUT);
+    # And write it.  The 0 means no utf8.
+    main::write([ $pod_directory, "$pod_file.pod" ], 0, \@OUT);
      return;
  }
  
@@ -12767,10 +13380,29 @@ END
      push @heavy, <<END;
  );
  
+# A few properties have different behavior under /i matching.  This maps the
+# those to substitute files to use under /i.
+\%utf8::caseless_equivalent = (
+END
+
+
+    # We set the key to the file when we associated files with tables, but we
+    # couldn't do the same for the value then, as we might not have the file
+    # for the alternate table figured out at that time.
+    foreach my $cased (keys %caseless_equivalent_to) {
+        my @path = $caseless_equivalent_to{$cased}->file_path;
+        my $path = join '/', @path[1, -1];
+        $path =~ s/\.pl//;
+        $utf8::caseless_equivalent_to{$cased} = $path;
+    }
+    push @heavy, simple_dumper (\%utf8::caseless_equivalent_to, ' ' x 4);
+    push @heavy, <<END;
+);
+
  1;
  END
  
-    main::write("Heavy.pl", @heavy);
+    main::write("Heavy.pl", 0, \@heavy);  # The 0 means no utf8.
      return;
  }
  
@@ -13087,7 +13719,7 @@ sub write_all_tables() {
              $filename = $table->file;
          }
  
-        # Use specified filename if avaliable, or default to property's
+        # Use specified filename if available, or default to property's
          # shortest name.  We need an 8.3 safe filename (which means "an 8
          # safe" filename, since after the dot is only 'pl', which is < 3)
          # The 2nd parameter is if the filename shouldn't be changed, and
@@ -13543,10 +14175,12 @@ sub make_property_test_script() {
          }
      }
  
-    &write($t_path, [<DATA>,
-                    @output,
-                    (map {"Test_X('$_');\n"} @backslash_X_tests),
-                    "Finished();\n"]);
+    &write($t_path,
+           0,           # Not utf8;
+           [<DATA>,
+            @output,
+            (map {"Test_X('$_');\n"} @backslash_X_tests),
+            "Finished();\n"]);
      return;
  }
  
@@ -13636,7 +14270,12 @@ my @input_file_objects = (
                                              ? \&filter_v1_ucd
                                              : ($v_version eq v2.1.5)
                                                  ? \&filter_v2_1_5_ucd
-                                                : undef),
+
+                                                # And for 5.14 Perls with 6.0,
+                                                # have to also make changes
+                                                : ($v_version ge v6.0.0)
+                                                    ? \&filter_v6_ucd
+                                                    : undef),
  
                                              # And the main filter
                                              \&filter_UnicodeData_line,
@@ -13705,7 +14344,6 @@ my @input_file_objects = (
                                   : undef,
                             \&filter_case_folding_line
                          ],
-                    Post_Handler => \&post_fold,
                      ),
      Input_file->new('DCoreProperties.txt', v3.1.0,
                      # 5.2 changed this file
@@ -13821,6 +14459,7 @@ File::Find::find({
  }, File::Spec->curdir());
  
  my @mktables_list_output_files;
+my $old_start_time = 0;
  
  if (! -e $file_list) {
      print "'$file_list' doesn't exist, so forcing rebuild.\n" if $verbosity >= $VERBOSE;
@@ -13843,6 +14482,9 @@ else {
          for my $list ( \@input, \@mktables_list_output_files ) {
              while (<$file_handle>) {
                  s/^ \s+ | \s+ $//xg;
+                if (/^ \s* \# .* Autogenerated\ starting\ on\ (\d+)/x) {
+                    $old_start_time = $1;
+                }
                  next if /^ \s* (?: \# .* )? $/x;
                  last if /^ =+ $/x;
                  my ( $file ) = split /\t/;
@@ -13950,12 +14592,12 @@ if ( $verbosity >= $VERBOSE ) {
           "Checking ".scalar( @mktables_list_output_files )." output files.\n";
  }
  
-# We set $youngest to be the most recently changed input file, including this
-# program itself (done much earlier in this file)
+# We set $most_recent to be the most recently changed input file, including
+# this program itself (done much earlier in this file)
  foreach my $in (@input_files) {
-    my $age = -M $in;
-    next unless defined $age;        # Keep going even if missing a file
-    $youngest = $age if $age < $youngest;
+    next unless -e $in;        # Keep going even if missing a file
+    my $mod_time = (stat $in)[9];
+    $most_recent = $mod_time if $mod_time > $most_recent;
  
      # See that the input files have distinct names, to warn someone if they
      # are adding a new one
@@ -13968,30 +14610,31 @@ foreach my $in (@input_files) {
      }
  }
  
-my $ok = ! $write_unchanged_files
-        && scalar @mktables_list_output_files;        # If none known, rebuild
+my $rebuild = $write_unchanged_files    # Rebuild: if unconditional rebuild
+              || ! scalar @mktables_list_output_files  # or if no outputs known
+              || $old_start_time < $most_recent;       # or out-of-date
  
  # Now we check to see if any output files are older than youngest, if
  # they are, we need to continue on, otherwise we can presumably bail.
-if ($ok) {
+if (! $rebuild) {
      foreach my $out (@mktables_list_output_files) {
          if ( ! file_exists($out)) {
              print "'$out' is missing.\n" if $verbosity >= $VERBOSE;
-            $ok = 0;
+            $rebuild = 1;
              last;
           }
          #local $to_trace = 1 if main::DEBUG;
-        trace $youngest, -M $out if main::DEBUG && $to_trace;
-        if ( -M $out > $youngest ) {
-            #trace "$out: age: ", -M $out, ", youngest: $youngest\n" if main::DEBUG && $to_trace;
+        trace $most_recent, (stat $out)[9] if main::DEBUG && $to_trace;
+        if ( (stat $out)[9] <= $most_recent ) {
+            #trace "$out:  most recent mod time: ", (stat $out)[9], ", youngest: $most_recent\n" if main::DEBUG && $to_trace;
              print "'$out' is too old.\n" if $verbosity >= $VERBOSE;
-            $ok = 0;
+            $rebuild = 1;
              last;
          }
      }
  }
-if ($ok) {
-    print "Files seem to be ok, not bothering to rebuild.\n";
+if (! $rebuild) {
+    print "Files seem to be ok, not bothering to rebuild.  Add '-w' option to force build\n";
      exit(0);
  }
  print "Must rebuild tables.\n" if $verbosity >= $VERBOSE;
@@ -14034,15 +14677,16 @@ if ( $file_list and $make_list ) {
          return
      }
      else {
+        my $localtime = localtime $start_time;
          print $ofh <<"END";
  #
  # $file_list -- File list for $0.
  #
-#   Autogenerated on @{[scalar localtime]}
+#   Autogenerated starting on $start_time ($localtime)
  #
  # - First section is input files
  #   ($0 itself is not listed but is automatically considered an input)
-# - Section seperator is /^=+\$/
+# - Section separator is /^=+\$/
  # - Second section is a list of output files.
  # - Lines matching /^\\s*#/ are treated as comments
  #   which along with blank lines are ignored.