mktables: Fix --annotate option output

[perl5.git] / lib / unicore / mktables
diff --git a/lib/unicore/mktables b/lib/unicore/mktables

index be7413a..d005c44 100644 (file)
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -4,17 +4,9 @@
  # Any files created or read by this program should be listed in 'mktables.lst'
  # Use -makelist to regenerate it.
  
-# Needs 'no overloading' to run faster on miniperl.  Code commented out at the
-# subroutine objaddr can be used instead to work as far back (untested) as
-# 5.8: needs pack "U".  But almost all occurrences of objaddr have been
-# removed in favor of using 'no overloading'.  You also would have to go
-# through and replace occurrences like:
-#       my $addr = do { no overloading; pack 'J', $self; }
-# with
-#       my $addr = main::objaddr $self;
-# (or reverse commit 9b01bafde4b022706c3d6f947a0963f821b2e50b
-# that instituted the change to main::objaddr, and subsequent commits that
-# changed 0+$self to pack 'J', $self.)
+# There was an attempt when this was first rewritten to make it 5.8
+# compatible, but that has now been abandoned, and newer constructs are used
+# as convenient.
  
  my $start_time;
  BEGIN { # Get the time the script started running; do it at compilation to
@@ -32,6 +24,7 @@ use File::Path;
  use File::Spec;
  use Text::Tabs;
  use re "/aa";
+use feature 'state';
  
  sub DEBUG () { 0 }  # Set to 0 for production; 1 for development
  my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/;
@@ -292,8 +285,8 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  # As mentioned earlier, some properties are given in more than one file.  In
  # particular, the files in the extracted directory are supposedly just
  # reformattings of the others.  But they contain information not easily
-# derivable from the other files, including results for Unihan, which this
-# program doesn't ordinarily look at, and for unassigned code points.  They
+# derivable from the other files, including results for Unihan (which isn't
+# usually available to this program) and for unassigned code points.  They
  # also have historically had errors or been incomplete.  In an attempt to
  # create the best possible data, this program thus processes them first to
  # glean information missing from the other files; then processes those other
@@ -411,24 +404,19 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  #
  # A NOTE ON UNIHAN
  #
-# This program can generate tables from the Unihan database.  But it doesn't
-# by default, letting the CPAN module Unicode::Unihan handle them.  Prior to
-# version 5.2, this database was in a single file, Unihan.txt.  In 5.2 the
-# database was split into 8 different files, all beginning with the letters
-# 'Unihan'.  This program will read those file(s) if present, but it needs to
-# know which of the many properties in the file(s) should have tables created
-# for them.  It will create tables for any properties listed in
-# PropertyAliases.txt and PropValueAliases.txt, plus any listed in the
-# @cjk_properties array and the @cjk_property_values array.  Thus, if a
-# property you want is not in those files of the release you are building
-# against, you must add it to those two arrays.  Starting in 4.0, the
-# Unicode_Radical_Stroke was listed in those files, so if the Unihan database
-# is present in the directory, a table will be generated for that property.
-# In 5.2, several more properties were added.  For your convenience, the two
-# arrays are initialized with all the 6.0 listed properties that are also in
-# earlier releases.  But these are commented out.  You can just uncomment the
-# ones you want, or use them as a template for adding entries for other
-# properties.
+# This program can generate tables from the Unihan database.  But that db
+# isn't normally available, so it is marked as optional.  Prior to version
+# 5.2, this database was in a single file, Unihan.txt.  In 5.2 the database
+# was split into 8 different files, all beginning with the letters 'Unihan'.
+# If you plunk those files down into the directory mktables ($0) is in, this
+# program will read them and automatically create tables for the properties
+# from it that are listed in PropertyAliases.txt and PropValueAliases.txt,
+# plus any you add to the @cjk_properties array and the @cjk_property_values
+# array, being sure to add necessary '# @missings' lines to the latter.  For
+# Unicode versions earlier than 5.2, most of the Unihan properties are not
+# listed at all in PropertyAliases nor PropValueAliases.  This program assumes
+# for these early releases that you want the properties that are specified in
+# the 5.2 release.
  #
  # You may need to adjust the entries to suit your purposes.  setup_unihan(),
  # and filter_unihan_line() are the functions where this is done.  This program
@@ -437,8 +425,8 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  #
  # There is a bug in the 3.2 data file in which some values for the
  # kPrimaryNumeric property have commas and an unexpected comment.  A filter
-# could be added for these; or for a particular installation, the Unihan.txt
-# file could be edited to fix them.
+# could be added to correct these; or for a particular installation, the
+# Unihan.txt file could be edited to fix them.
  #
  # HOW TO ADD A FILE TO BE PROCESSED
  #
@@ -484,13 +472,13 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  # handled by Unicode::Normalize, nor will it compile when presented a version
  # that has them.  However, you can trivially get it to compile by simply
  # ignoring those decompositions, by changing the croak to a carp.  At the time
-# of this writing, the line (in cpan/Unicode-Normalize/mkheader) reads
+# of this writing, the line (in cpan/Unicode-Normalize/Normalize.pm or
+# cpan/Unicode-Normalize/mkheader) reads
  #
  #   croak("Weird Canonical Decomposition of U+$h");
  #
  # Simply comment it out.  It will compile, but will not know about any three
-# character decompositions.  If using the .pm version, there is a similar
-# line.
+# character decompositions.
  
  # The number of code points in \p{alpha=True} halved in 2.1.9.  It turns out
  # that the reason is that the CJK block starting at 4E00 was removed from
@@ -513,10 +501,13 @@ my $unicode_reference_url = 'http://www.unicode.org/reports/tr44/';
  # name for the class, it would not have been affected, but if it used the
  # mnemonic, it would have been.
  #
-# \p{Script=Hrkt} (Katakana_Or_Hiragana) came in 4.0.1.  Before that code
+# \p{Script=Hrkt} (Katakana_Or_Hiragana) came in 4.0.1.  Before that, code
  # points which eventually came to have this script property value, instead
  # mapped to "Unknown".  But in the next release all these code points were
  # moved to \p{sc=common} instead.
+
+# The tests furnished  by Unicode for testing WordBreak and SentenceBreak
+# generate errors in 5.0 and earlier.
  #
  # The default for missing code points for BidiClass is complicated.  Starting
  # in 3.1.1, the derived file DBidiClass.txt handles this, but this program
@@ -596,8 +587,8 @@ our $to_trace = 0;
                                      || $caller_name eq 'trace');
  
          my $output = "";
+        #print STDERR __LINE__, ": ", join ", ", @input, "\n";
          foreach my $string (@input) {
-            #print STDERR __LINE__, ": ", join ", ", @input, "\n";
              if (ref $string eq 'ARRAY' || ref $string eq 'HASH') {
                  $output .= simple_dumper($string);
              }
@@ -623,15 +614,18 @@ our $to_trace = 0;
  
  # This is for a rarely used development feature that allows you to compare two
  # versions of the Unicode standard without having to deal with changes caused
-# by the code points introduced in the later version.  Change the 0 to a
-# string containing a SINGLE dotted Unicode release number (e.g. "2.1").  Only
-# code points introduced in that release and earlier will be used; later ones
-# are thrown away.  You use the version number of the earliest one you want to
-# compare; then run this program on directory structures containing each
-# release, and compare the outputs.  These outputs will therefore include only
-# the code points common to both releases, and you can see the changes caused
-# just by the underlying release semantic changes.  For versions earlier than
-# 3.2, you must copy a version of DAge.txt into the directory.
+# by the code points introduced in the later version.  You probably also want
+# to use the -annotate option when using this.  Run this program on a unicore
+# containing the starting release you want to compare.  Save that output
+# structrue.  Then, switching to a unicore with the ending release, change the
+# 0 in the $string_compare_versions definition just below to a string
+# containing a SINGLE dotted Unicode release number (e.g. "2.1") corresponding
+# to the starting release.  This program will then compile, but throw away all
+# code points introduced after the starting release.  Finally use a diff tool
+# to compare the two directory structures.  They include only the code points
+# common to both releases, and you can see the changes caused just by the
+# underlying release semantic changes.  For versions earlier than 3.2, you
+# must copy a version of DAge.txt into the directory.
  my $string_compare_versions = DEBUG && 0; #  e.g., "2.1";
  my $compare_versions = DEBUG
                         && $string_compare_versions
@@ -810,6 +804,11 @@ close $VERSION;
  chomp $string_version;
  my $v_version = pack "C*", split /\./, $string_version;        # v string
  
+my $unicode_version = ($compare_versions)
+                      ? (  "$string_compare_versions (using "
+                         . "$string_version rules)")
+                      : $string_version;
+
  # The following are the complete names of properties with property values that
  # are known to not match any code points in some versions of Unicode, but that
  # may change in the future so they should be matchable, hence an empty file is
@@ -861,33 +860,8 @@ if ($v_version gt v3.2.0) {
                                  'Canonical_Combining_Class=Attached_Below_Left'
  }
  
-# These are listed in the Property aliases file in 6.0, but Unihan is ignored
-# unless explicitly added.
-if ($v_version ge v5.2.0) {
-    my $unihan = 'Unihan; remove from list if using Unihan';
-    foreach my $table (qw (
-                           kAccountingNumeric
-                           kOtherNumeric
-                           kPrimaryNumeric
-                           kCompatibilityVariant
-                           kIICore
-                           kIRG_GSource
-                           kIRG_HSource
-                           kIRG_JSource
-                           kIRG_KPSource
-                           kIRG_MSource
-                           kIRG_KSource
-                           kIRG_TSource
-                           kIRG_USource
-                           kIRG_VSource
-                           kRSUnicode
-                        ))
-    {
-        $why_suppress_if_empty_warn_if_not{$table} = $unihan;
-    }
-}
-
-# Enum values for to_output_map() method in the Map_Table package.
+# Enum values for to_output_map() method in the Map_Table package. (0 is don't
+# output)
  my $EXTERNAL_MAP = 1;
  my $INTERNAL_MAP = 2;
  my $OUTPUT_ADJUSTED = 3;
@@ -913,13 +887,6 @@ my %global_to_output_map = (
      Decomposition_Type => 0,
  );
  
-# Properties that this program ignores.
-my @unimplemented_properties;
-
-# With this release, it is automatically handled if the Unihan db is
-# downloaded
-push @unimplemented_properties, 'Unicode_Radical_Stroke' if $v_version lt v5.2.0;
-
  # There are several types of obsolete properties defined by Unicode.  These
  # must be hand-edited for every new Unicode release.
  my %why_deprecated;  # Generates a deprecated warning message if used.
@@ -959,8 +926,6 @@ my %why_obsolete;    # Documentation only
          # existence is not noted in the comment.
          'Decomposition_Mapping' => 'Accessible via Unicode::Normalize or prop_invmap() or charprop() in Unicode::UCD::',
  
-        'Indic_Matra_Category' => "Withdrawn by Unicode while still provisional",
-
          # Don't suppress ISO_Comment, as otherwise special handling is needed
          # to differentiate between it and gc=c, which can be written as 'isc',
          # which is the same characters as ISO_Comment's short name.
@@ -1046,45 +1011,13 @@ if ($v_version ge v6.0.0) {
  my @output_mapped_properties = split "\n", <<END;
  END
  
-# If you are using the Unihan database in a Unicode version before 5.2, you
-# need to add the properties that you want to extract from it to this table.
-# For your convenience, the properties in the 6.0 PropertyAliases.txt file are
-# listed, commented out
+# If you want more Unihan properties than the default, you need to add them to
+# these arrays.  Depending on the property type, @missing lines might have to
+# be added to the second array.  A sample entry would be (including the '#'):
+# @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
  my @cjk_properties = split "\n", <<'END';
-#cjkAccountingNumeric; kAccountingNumeric
-#cjkOtherNumeric; kOtherNumeric
-#cjkPrimaryNumeric; kPrimaryNumeric
-#cjkCompatibilityVariant; kCompatibilityVariant
-#cjkIICore ; kIICore
-#cjkIRG_GSource; kIRG_GSource
-#cjkIRG_HSource; kIRG_HSource
-#cjkIRG_JSource; kIRG_JSource
-#cjkIRG_KPSource; kIRG_KPSource
-#cjkIRG_KSource; kIRG_KSource
-#cjkIRG_TSource; kIRG_TSource
-#cjkIRG_USource; kIRG_USource
-#cjkIRG_VSource; kIRG_VSource
-#cjkRSUnicode; kRSUnicode                ; Unicode_Radical_Stroke; URS
  END
-
-# Similarly for the property values.  For your convenience, the lines in the
-# 6.0 PropertyAliases.txt file are listed.  Just remove the first BUT NOT both
-# '#' marks (for Unicode versions before 5.2)
  my @cjk_property_values = split "\n", <<'END';
-## @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
-## @missing: 0000..10FFFF; cjkCompatibilityVariant; <code point>
-## @missing: 0000..10FFFF; cjkIICore; <none>
-## @missing: 0000..10FFFF; cjkIRG_GSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_HSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_JSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_KPSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_KSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_TSource; <none>
-## @missing: 0000..10FFFF; cjkIRG_USource; <none>
-## @missing: 0000..10FFFF; cjkIRG_VSource; <none>
-## @missing: 0000..10FFFF; cjkOtherNumeric; NaN
-## @missing: 0000..10FFFF; cjkPrimaryNumeric; NaN
-## @missing: 0000..10FFFF; cjkRSUnicode; <none>
  END
  
  # The input files don't list every code point.  Those not listed are to be
@@ -1109,7 +1042,7 @@ my %default_mapping = (
      Decomposition_Type => 'None',
      East_Asian_Width => "Neutral",
      FC_NFKC_Closure => $CODE_POINT,
-    General_Category => 'Cn',
+    General_Category => ($v_version le 6.3.0) ? 'Cn' : 'Unassigned',
      Grapheme_Cluster_Break => 'Other',
      Hangul_Syllable_Type => 'NA',
      ISO_Comment => "",
@@ -1140,45 +1073,12 @@ my %default_mapping = (
      Word_Break => 'Other',
  );
  
-# Below are files that Unicode furnishes, but this program ignores, and why.
-# NormalizationCorrections.txt requires some more explanation.  It documents
-# the cumulative fixes to erroneous normalizations in earlier Unicode
-# versions.  Its main purpose is so that someone running on an earlier version
-# can use this file to override what got published in that earlier release.
-# It would be easy for mktables to read and handle this file.  But all the
-# corrections in it should already be in the other files for the release it
-# is.  To get it to actually mean something useful, someone would have to be
-# using an earlier Unicode release, and copy it to the files for that release
-# and recomplile.  So far there has been no demand to do that, so this hasn't
-# been implemented.
-my %ignored_files = (
-    'CJKRadicals.txt' => 'Maps the kRSUnicode property values to corresponding code points',
-    'Index.txt' => 'Alphabetical index of Unicode characters',
-    'NamedSqProv.txt' => 'Named sequences proposed for inclusion in a later version of the Unicode Standard; if you need them now, you can append this file to F<NamedSequences.txt> and recompile perl',
-    'NamesList.txt' => 'Annotated list of characters',
-    'NamesList.html' => 'Describes the format and contents of F<NamesList.txt>',
-    'NormalizationCorrections.txt' => 'Documentation of corrections already incorporated into the Unicode data base',
-    'Props.txt' => 'Only in very early releases; is a subset of F<PropList.txt> (which is used instead)',
-    'ReadMe.txt' => 'Documentation',
-    'StandardizedVariants.txt' => 'Certain glyph variations for character display are standardized.  This lists the non-Unihan ones; the Unihan ones are also not used by Perl, and are in a separate Unicode data base L<http://www.unicode.org/ivd>',
-    'StandardizedVariants.html' => 'Provides a visual display of the standard variant sequences derived from F<StandardizedVariants.txt>.',
-    'EmojiSources.txt' => 'Maps certain Unicode code points to their legacy Japanese cell-phone values',
-    'USourceData.txt' => 'Documentation of status and cross reference of proposals for encoding by Unicode of Unihan characters',
-    'USourceGlyphs.pdf' => 'Pictures of the characters in F<USourceData.txt>',
-    'auxiliary/WordBreakTest.html' => 'Documentation of validation tests',
-    'auxiliary/SentenceBreakTest.html' => 'Documentation of validation tests',
-    'auxiliary/GraphemeBreakTest.html' => 'Documentation of validation tests',
-    'auxiliary/LineBreakTest.html' => 'Documentation of validation tests',
-);
-
-my %skipped_files;  # List of files that we skip
-
  ### End of externally interesting definitions, except for @input_file_objects
  
  my $HEADER=<<"EOF";
  # !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!!
  # This file is machine-generated by $0 from the Unicode
-# database, Version $string_version.  Any changes made here will be lost!
+# database, Version $unicode_version.  Any changes made here will be lost!
  EOF
  
  my $INTERNAL_ONLY_HEADER = <<"EOF";
@@ -1308,11 +1208,15 @@ my $OBSOLETE = 'O';
  my $a_bold_obsolete = "an 'B<$OBSOLETE>'";
  my $A_bold_obsolete = "An 'B<$OBSOLETE>'";
  
+# Aliases can also have an extra status:
+my $INTERNAL_ALIAS = 'P';
+
  my %status_past_participles = (
      $DISCOURAGED => 'discouraged',
      $STABILIZED => 'stabilized',
      $OBSOLETE => 'obsolete',
      $DEPRECATED => 'deprecated',
+    $INTERNAL_ALIAS => 'reserved for Perl core internal use only',
  );
  
  # Table fates.  These are somewhat ordered, so that fates < $MAP_PROXIED should be
@@ -1375,18 +1279,21 @@ my %loose_to_file_of;       # loosely maps table names to their respective
                              # files
  my %stricter_to_file_of;    # same; but for stricter mapping.
  my %loose_property_to_file_of; # Maps a loose property name to its map file
+my %strict_property_to_file_of; # Same, but strict
  my @inline_definitions = "V0"; # Each element gives a definition of a unique
                              # inversion list.  When a definition is inlined,
                              # its value in the hash it's in (one of the two
                              # defined just above) will include an index into
                              # this array.  The 0th element is initialized to
-                            # the definition for a zero length invwersion list
+                            # the definition for a zero length inversion list
  my %file_to_swash_name;     # Maps the file name to its corresponding key name
                              # in the hash %utf8::SwashInfo
  my %nv_floating_to_rational; # maps numeric values floating point numbers to
                               # their rational equivalent
  my %loose_property_name_of; # Loosely maps (non_string) property names to
                              # standard form
+my %strict_property_name_of; # Strictly maps (non_string) property names to
+                            # standard form
  my %string_property_loose_to_name; # Same, for string properties.
  my %loose_defaults;         # keys are of form "prop=value", where 'prop' is
                              # the property name in standard loose form, and
@@ -1396,12 +1303,16 @@ my %loose_to_standard_value; # loosely maps table names to the canonical
                              # alias for them
  my %ambiguous_names;        # keys are alias names (in standard form) that
                              # have more than one possible meaning.
+my %combination_property;   # keys are alias names (in standard form) that
+                            # have both a map table, and a binary one that
+                            # yields true for all non-null maps.
  my %prop_aliases;           # Keys are standard property name; values are each
                              # one's aliases
  my %prop_value_aliases;     # Keys of top level are standard property name;
                              # values are keys to another hash,  Each one is
                              # one of the property's values, in standard form.
                              # The values are that prop-val's aliases.
+my %skipped_files;          # List of files that we skip
  my %ucd_pod;    # Holds entries that will go into the UCD section of the pod
  
  # Most properties are immune to caseless matching, otherwise you would get
@@ -1416,6 +1327,12 @@ my %ucd_pod;    # Holds entries that will go into the UCD section of the pod
  # unlikely that they will ever change.
  my %caseless_equivalent_to;
  
+# This is the range of characters that were in Release 1 of Unicode, and
+# removed in Release 2 (replaced with the current Hangul syllables starting at
+# U+AC00).  The range was reused starting in Release 3 for other purposes.
+my $FIRST_REMOVED_HANGUL_SYLLABLE = 0x3400;
+my $FINAL_REMOVED_HANGUL_SYLLABLE = 0x4DFF;
+
  # These constants names and values were taken from the Unicode standard,
  # version 5.1, section 3.12.  They are used in conjunction with Hangul
  # syllables.  The '_string' versions are so generated tables can retain the
@@ -1478,6 +1395,8 @@ my @named_sequences;       # NamedSequences.txt contents.
  my %potential_files;       # Generated list of all .txt files in the directory
                             # structure so we can warn if something is being
                             # ignored.
+my @missing_early_files;   # Generated list of absent files that we need to
+                           # proceed in compiling this early Unicode version
  my @files_actually_output; # List of files we generated.
  my @more_Names;            # Some code point names are compound; this is used
                             # to store the extra components of them.
@@ -1487,6 +1406,7 @@ my $MIN_FRACTION_LENGTH = 3; # How many digits of a floating point number at
  my $MAX_FLOATING_SLOP = 10 ** - $MIN_FRACTION_LENGTH; # And in floating terms
  
  # These store references to certain commonly used property objects
+my $age;
  my $ccc;
  my $gc;
  my $perl;
@@ -1495,6 +1415,8 @@ my $perl_charname;
  my $print;
  my $All;
  my $Assigned;   # All assigned characters in this Unicode release
+my $DI;         # Default_Ignorable_Code_Point property
+my $NChar;      # Noncharacter_Code_Point property
  my $script;
  
  # Are there conflicting names because of beginning with 'In_', or 'Is_'
@@ -1535,6 +1457,7 @@ sub objaddr($) {
  # after all the input has been processed.  But most can be skipped, as they
  # have the same descriptive phrases, such as being unassigned
  my @viacode;            # Contains the 1 million character names
+my @age;                # And their ages ("" if none)
  my @printable;          # boolean: And are those characters printable?
  my @annotate_char_type; # Contains a type of those characters, specifically
                          # for the purposes of annotation.
@@ -1569,12 +1492,28 @@ sub populate_char_info ($) {
      Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
      $viacode[$i] = $perl_charname->value_of($i) || "";
+    $age[$i] = (defined $age)
+               ? (($age->value_of($i) =~ / ^ \d \. \d $ /x)
+                  ? $age->value_of($i)
+                  : "")
+               : "";
  
      # A character is generally printable if Unicode says it is,
      # but below we make sure that most Unicode general category 'C' types
      # aren't.
      $printable[$i] = $print->contains($i);
  
+    # But the characters in this range were removed in v2.0 and replaced by
+    # different ones later.  Modern fonts will be for the replacement
+    # characters, so suppress printing them.
+    if (($v_version lt v2.0
+         || ($compare_versions && $compare_versions lt v2.0))
+        && (   $i >= $FIRST_REMOVED_HANGUL_SYLLABLE
+            && $i <= $FINAL_REMOVED_HANGUL_SYLLABLE))
+    {
+        $printable[$i] = 0;
+    }
+
      $annotate_char_type[$i] = $perl_charname->type_of($i) || 0;
  
      # Only these two regular types are treated specially for annotations
@@ -1587,7 +1526,6 @@ sub populate_char_info ($) {
      # point of the range.
      my $end;
      if (! $viacode[$i]) {
-        my $nonchar;
          if ($i > $MAX_UNICODE_CODEPOINT) {
              $viacode[$i] = 'Above-Unicode';
              $annotate_char_type[$i] = $ABOVE_UNICODE_TYPE;
@@ -1600,30 +1538,29 @@ sub populate_char_info ($) {
              $printable[$i] = 0;
              $end = $gc->table('Private_Use')->containing_range($i)->end;
          }
-        elsif ((defined ($nonchar =
-                            Property::property_ref('Noncharacter_Code_Point'))
-               && $nonchar->table('Y')->contains($i)))
-        {
+        elsif ($NChar->contains($i)) {
              $viacode[$i] = 'Noncharacter';
              $annotate_char_type[$i] = $NONCHARACTER_TYPE;
              $printable[$i] = 0;
-            $end = property_ref('Noncharacter_Code_Point')->table('Y')->
-                                                    containing_range($i)->end;
+            $end = $NChar->containing_range($i)->end;
          }
          elsif ($gc-> table('Control')->contains($i)) {
-            $viacode[$i] = property_ref('Name_Alias')->value_of($i) || 'Control';
+            my $name_ref = property_ref('Name_Alias');
+            $name_ref = property_ref('Unicode_1_Name') if ! defined $name_ref;
+            $viacode[$i] = (defined $name_ref)
+                           ? $name_ref->value_of($i)
+                           : 'Control';
              $annotate_char_type[$i] = $CONTROL_TYPE;
              $printable[$i] = 0;
          }
          elsif ($gc-> table('Unassigned')->contains($i)) {
              $annotate_char_type[$i] = $UNASSIGNED_TYPE;
              $printable[$i] = 0;
-            if ($v_version lt v2.0.0) { # No blocks in earliest releases
-                $viacode[$i] = 'Unassigned';
+            $viacode[$i] = 'Unassigned';
+
+            if (defined $block) { # No blocks in earliest releases
+                $viacode[$i] .= ', block=' . $block-> value_of($i);
                  $end = $gc-> table('Unassigned')->containing_range($i)->end;
-            }
-            else {
-                $viacode[$i] = 'Unassigned, block=' . $block-> value_of($i);
  
                  # Because we name the unassigned by the blocks they are in, it
                  # can't go past the end of that block, and it also can't go
@@ -1634,13 +1571,15 @@ sub populate_char_info ($) {
                             $unassigned_sans_noncharacters->
                                                      containing_range($i)->end);
              }
+            else {
+                $end = $i + 1;
+                while ($unassigned_sans_noncharacters->contains($end)) {
+                    $end++;
+                }
+                $end--;
+            }
          }
-        elsif ($v_version lt v2.0.0) {  # No surrogates in earliest releases
-            $viacode[$i] = $gc->value_of($i);
-            $annotate_char_type[$i] = $UNKNOWN_TYPE;
-            $printable[$i] = 0;
-        }
-        elsif ($gc-> table('Surrogate')->contains($i)) {
+        elsif ($perl->table('_Perl_Surrogate')->contains($i)) {
              $viacode[$i] = 'Surrogate';
              $annotate_char_type[$i] = $SURROGATE_TYPE;
              $printable[$i] = 0;
@@ -1660,7 +1599,21 @@ sub populate_char_info ($) {
      # appended to the name, do that.
      elsif ($annotate_char_type[$i] == $CP_IN_NAME) {
          $viacode[$i] .= sprintf("-%04X", $i);
-        $end = $perl_charname->containing_range($i)->end;
+
+        my $limit = $perl_charname->containing_range($i)->end;
+        if (defined $age) {
+            # Do all these as groups of the same age, instead of individually,
+            # because their names are so meaningless, and there are typically
+            # large quantities of them.
+            $end = $i + 1;
+            while ($end <= $limit && $age->value_of($end) == $age[$i]) {
+                $end++;
+            }
+            $end--;
+        }
+        else {
+            $end = $limit;
+        }
      }
  
      # And here, has a name, but if it's a hangul syllable one, replace it with
@@ -2112,6 +2065,7 @@ package Input_file;
  # while(next_line()) {...} loop.
  #
  # You can also set up handlers to
+#   0) call during object construction time, after everything else is done
  #   1) call before the first line is read, for pre processing
  #   2) call to adjust each line of the input before the main handler gets
  #      them.  This can be automatically generated, if appropriately simple
@@ -2123,19 +2077,29 @@ package Input_file;
  # each_line_handler()s.  So, if the format of the line is not in the desired
  # format for the main handler, these are used to do that adjusting.  They can
  # be stacked (by enclosing them in an [ anonymous array ] in the constructor,
-# so the $_ output of one is used as the input to the next.  None of the other
-# handlers are stackable, but could easily be changed to be so.
+# so the $_ output of one is used as the input to the next.  The eof handler
+# is also stackable, but none of the others are, but could easily be changed
+# to be so.
+#
+# Some properties are used by the Perl core but aren't defined until later
+# Unicode releases.  The perl interpreter would have problems working when
+# compiled with an earlier Unicode version that doesn't have them, so we need
+# to define them somehow for those releases.  The 'Early' constructor
+# parameter can be used to automatically handle this.  It is essentially
+# ignored if the Unicode version being compiled has a data file for this
+# property.  Either code to execute or a file to read can be specified.
+# Details are at the %early definition.
  #
  # Most of the handlers can call insert_lines() or insert_adjusted_lines()
  # which insert the parameters as lines to be processed before the next input
-# file line is read.  This allows the EOF handler to flush buffers, for
+# file line is read.  This allows the EOF handler(s) to flush buffers, for
  # example.  The difference between the two routines is that the lines inserted
  # by insert_lines() are subjected to the each_line_handler()s.  (So if you
-# called it from such a handler, you would get infinite recursion.)  Lines
-# inserted by insert_adjusted_lines() go directly to the main handler without
-# any adjustments.  If the  post-processing handler calls any of these, there
-# will be no effect.  Some error checking for these conditions could be added,
-# but it hasn't been done.
+# called it from such a handler, you would get infinite recursion without some
+# mechanism to prevent that.)  Lines inserted by insert_adjusted_lines() go
+# directly to the main handler without any adjustments.  If the
+# post-processing handler calls any of these, there will be no effect.  Some
+# error checking for these conditions could be added, but it hasn't been done.
  #
  # carp_bad_line() should be called to warn of bad input lines, which clears $_
  # to prevent further processing of the line.  This routine will output the
@@ -2171,10 +2135,16 @@ sub trace { return main::trace(@_); }
      main::set_access('property', \%property, qw{ c r });
  
      my %optional;
-    # If this is true, the file is optional.  If not present, no warning is
-    # output.  If it is present, the string given by this parameter is
-    # evaluated, and if false the file is not processed.
-    main::set_access('optional', \%optional, 'c', 'r');
+    # This is either an unsigned number, or a list of property names.  In the
+    # former case, if it is non-zero, it means the file is optional, so if the
+    # file is absent, no warning about that is output.  In the latter case, it
+    # is a list of properties that the file (exclusively) defines.  If the
+    # file is present, tables for those properties will be produced; if
+    # absent, none will, even if they are listed elsewhere (namely
+    # PropertyAliases.txt and PropValueAliases.txt) as being in this release,
+    # and no warnings will be raised about them not being available.  (And no
+    # warning about the file itself will be raised.)
+    main::set_access('optional', \%optional, qw{ c readable_array } );
  
      my %non_skip;
      # This is used for debugging, to skip processing of all but a few input
@@ -2183,16 +2153,19 @@ sub trace { return main::trace(@_); }
      main::set_access('non_skip', \%non_skip, 'c');
  
      my %skip;
-    # This is used to skip processing of this input file semi-permanently,
-    # when it evaluates to true.  The value should be the reason the file is
-    # being skipped.  It is used for files that we aren't planning to process
-    # anytime soon, but want to allow to be in the directory and not raise a
-    # message that we are not handling.  Mostly for test files.  This is in
-    # contrast to the non_skip element, which is supposed to be used very
-    # temporarily for debugging.  Sets 'optional' to 1.  Also, files that we
-    # pretty much will never look at can be placed in the global
-    # %ignored_files instead.  Ones used here will be added to %skipped files
-    main::set_access('skip', \%skip, 'c');
+    # This is used to skip processing of this input file (semi-) permanently.
+    # The value should be the reason the file is being skipped.  It is used
+    # for files that we aren't planning to process anytime soon, but want to
+    # allow to be in the directory and be checked for their names not
+    # conflicting with any other files on a DOS 8.3 name filesystem, but to
+    # not otherwise be processed, and to not raise a warning about not being
+    # handled.  In the constructor call, any value that evaluates to a numeric
+    # 0 or undef means don't skip.  Any other value is a string giving the
+    # reason it is being skippped, and this will appear in generated pod.
+    # However, an empty string reason will suppress the pod entry.
+    # Internally, calls that evaluate to numeric 0 are changed into undef to
+    # distinguish them from an empty string call.
+    main::set_access('skip', \%skip, 'c', 'r');
  
      my %each_line_handler;
      # list of subroutines to look at and filter each non-comment line in the
@@ -2223,21 +2196,28 @@ sub trace { return main::trace(@_); }
      main::set_access('has_missings_defaults',
                                          \%has_missings_defaults, qw{ c r });
  
+    my %construction_time_handler;
+    # Subroutine to call at the end of the new method.  If undef, no such
+    # handler is called.
+    main::set_access('construction_time_handler',
+                                        \%construction_time_handler, qw{ c });
+
      my %pre_handler;
      # Subroutine to call before doing anything else in the file.  If undef, no
      # such handler is called.
      main::set_access('pre_handler', \%pre_handler, qw{ c });
  
      my %eof_handler;
-    # Subroutine to call upon getting an EOF on the input file, but before
+    # Subroutines to call upon getting an EOF on the input file, but before
      # that is returned to the main handler.  This is to allow buffers to be
      # flushed.  The handler is expected to call insert_lines() or
      # insert_adjusted() with the buffered material
-    main::set_access('eof_handler', \%eof_handler, qw{ c r });
+    main::set_access('eof_handler', \%eof_handler, qw{ c });
  
      my %post_handler;
      # Subroutine to call after all the lines of the file are read in and
-    # processed.  If undef, no such handler is called.
+    # processed.  If undef, no such handler is called.  Note that this cannot
+    # add lines to be processed; instead use eof_handler
      main::set_access('post_handler', \%post_handler, qw{ c });
  
      my %progress_message;
@@ -2265,6 +2245,69 @@ sub trace { return main::trace(@_); }
      # storage of '@missing' defaults lines
      main::set_access('missings', \%missings);
  
+    my %early;
+    # Used for properties that must be defined (for Perl's purposes) on
+    # versions of Unicode earlier than Unicode itself defines them.  The
+    # parameter is an array (it would be better to be a hash, but not worth
+    # bothering about due to its rare use).
+    #
+    # The first element is either a code reference to call when in a release
+    # earlier than the Unicode file is available in, or it is an alternate
+    # file to use instead of the non-existent one.  This file must have been
+    # plunked down in the same directory as mktables.  Should you be compiling
+    # on a release that needs such a file, mktables will abort the
+    # compilation, and tell you where to get the necessary file(s), and what
+    # name(s) to use to store them as.
+    # In the case of specifying an alternate file, the array must contain two
+    # further elements:
+    #
+    # [1] is the name of the property that will be generated by this file.
+    # The class automatically takes the input file and excludes any code
+    # points in it that were not assigned in the Unicode version being
+    # compiled.  It then uses this result to define the property in the given
+    # version.  Since the property doesn't actually exist in the Unicode
+    # version being compiled, this should be a name accessible only by core
+    # perl.  If it is the same name as the regular property, the constructor
+    # will mark the output table as a $PLACEHOLDER so that it doesn't actually
+    # get output, and so will be unusable by non-core code.  Otherwise it gets
+    # marked as $INTERNAL_ONLY.
+    #
+    # [2] is a property value to assign (only when compiling Unicode 1.1.5) to
+    # the Hangul syllables in that release (which were ripped out in version
+    # 2) for the given property .  (Hence it is ignored except when compiling
+    # version 1.  You only get one value that applies to all of them, which
+    # may not be the actual reality, but probably nobody cares anyway for
+    # these obsolete characters.)
+    #
+    # Not all files can be handled in the above way, and so the code ref
+    # alternative is available.  It can do whatever it needs to.  The other
+    # array elements are optional in this case, and the code is free to use or
+    # ignore them if they are present.
+    #
+    # Internally, the constructor unshifts a 0 or 1 onto this array to
+    # indicate if an early alternative is actually being used or not.  This
+    # makes for easier testing later on.
+    main::set_access('early', \%early, 'c');
+
+    my %required_even_in_debug_skip;
+    # debug_skip is used to speed up compilation during debugging by skipping
+    # processing files that are not needed for the task at hand.  However,
+    # some files pretty much can never be skipped, and this is used to specify
+    # that this is one of them.  In order to skip this file, the call to the
+    # constructor must be edited to comment out this parameter.
+    main::set_access('required_even_in_debug_skip',
+                     \%required_even_in_debug_skip, 'c');
+
+    my %withdrawn;
+    # Some files get removed from the Unicode DB.  This is a version object
+    # giving the first release without this file.
+    main::set_access('withdrawn', \%withdrawn, 'c');
+
+    my %in_this_release;
+    # Calculated value from %first_released and %withdrawn.  Are we compiling
+    # a Unicode release which includes this file?
+    main::set_access('in_this_release', \%in_this_release);
+
      sub _next_line;
      sub _next_line_with_remapped_range;
  
@@ -2277,22 +2320,23 @@ sub trace { return main::trace(@_); }
          # Set defaults
          $handler{$addr} = \&main::process_generic_property_file;
          $non_skip{$addr} = 0;
-        $skip{$addr} = 0;
+        $skip{$addr} = undef;
          $has_missings_defaults{$addr} = $NO_DEFAULTS;
          $handle{$addr} = undef;
          $added_lines{$addr} = [ ];
          $remapped_lines{$addr} = [ ];
          $each_line_handler{$addr} = [ ];
+        $eof_handler{$addr} = [ ];
          $errors{$addr} = { };
          $missings{$addr} = [ ];
+        $early{$addr} = [ ];
+        $optional{$addr} = [ ];
  
          # Two positional parameters.
          return Carp::carp_too_few_args(\@_, 2) if main::DEBUG && @_ < 2;
          $file{$addr} = main::internal_file_to_platform(shift);
          $first_released{$addr} = shift;
  
-        undef $file{$addr} if $first_released{$addr} gt $v_version;
-
          # The rest of the arguments are key => value pairs
          # %constructor_fields has been set up earlier to list all possible
          # ones.  Either set or push, depending on how the default has been set
@@ -2324,30 +2368,206 @@ sub trace { return main::trace(@_); }
              delete $args{$key};
          };
  
-        # If the file has a property for it, it means that the property is not
-        # listed in the file's entries.  So add a handler to the list of line
-        # handlers to insert the property name into the lines, to provide a
-        # uniform interface to the final processing subroutine.
-        # the final code doesn't have to worry about that.
-        if ($property{$addr}) {
-            push @{$each_line_handler{$addr}}, \&_insert_property_into_line;
+        $non_skip{$addr} = 1 if $required_even_in_debug_skip{$addr};
+
+        # Convert 0 (meaning don't skip) to undef
+        undef $skip{$addr} unless $skip{$addr};
+
+        # Handle the case where this file is optional
+        my $pod_message_for_non_existent_optional = "";
+        if ($optional{$addr}->@*) {
+
+            # First element is the pod message
+            $pod_message_for_non_existent_optional
+                                                = shift $optional{$addr}->@*;
+            # Convert a 0 'Optional' argument to an empty list to make later
+            # code more concise.
+            if (   $optional{$addr}->@*
+                && $optional{$addr}->@* == 1
+                && $optional{$addr}[0] ne ""
+                && $optional{$addr}[0] !~ /\D/
+                && $optional{$addr}[0] == 0)
+            {
+                $optional{$addr} = [ ];
+            }
+            else {  # But if the only element doesn't evaluate to 0, make sure
+                    # that this file is indeed considered optional below.
+                unshift $optional{$addr}->@*, 1;
+            }
+        }
+
+        my $progress;
+        my $function_instead_of_file = 0;
+
+        # If we are compiling a Unicode release earlier than the file became
+        # available, the constructor may have supplied a substitute
+        if ($first_released{$addr} gt $v_version && $early{$addr}->@*) {
+
+            # Yes, we have a substitute, that we will use; mark it so
+            unshift $early{$addr}->@*, 1;
+
+            # See the definition of %early for what the array elements mean.
+            # If we have a property this defines, create a table and default
+            # map for it now (at essentially compile time), so that it will be
+            # available for the whole of run time.  (We will want to add this
+            # name as an alias when we are using the official property name;
+            # but this must be deferred until run(), because at construction
+            # time the official names have yet to be defined.)
+            if ($early{$addr}[2]) {
+                my $fate = ($property{$addr}
+                            && $property{$addr} eq $early{$addr}[2])
+                          ? $PLACEHOLDER
+                          : $INTERNAL_ONLY;
+                my $prop_object = Property->new($early{$addr}[2],
+                                                Fate => $fate,
+                                                Perl_Extension => 1,
+                                                );
+
+                # Use the default mapping for the regular property for this
+                # substitute one.
+                if (    defined $property{$addr}
+                    &&  defined $default_mapping{$property{$addr}})
+                {
+                    $prop_object
+                        ->set_default_map($default_mapping{$property{$addr}});
+                }
+            }
+
+            if (ref $early{$addr}[1] eq 'CODE') {
+                $function_instead_of_file = 1;
+
+                # If the first element of the array is a code ref, the others
+                # are optional.
+                $handler{$addr} = $early{$addr}[1];
+                $property{$addr} = $early{$addr}[2]
+                                                if defined $early{$addr}[2];
+                $progress = "substitute $file{$addr}";
+
+                undef $file{$addr};
+            }
+            else {  # Specifying a substitute file
+
+                if (! main::file_exists($early{$addr}[1])) {
+
+                    # If we don't see the substitute file, generate an error
+                    # message giving the needed things, and add it to the list
+                    # of such to output before actual processing happens
+                    # (hence the user finds out all of them in one run).
+                    # Instead of creating a general method for NameAliases,
+                    # hard-code it here, as there is unlikely to ever be a
+                    # second one which needs special handling.
+                    my $string_version = ($file{$addr} eq "NameAliases.txt")
+                                    ? 'at least 6.1 (the later, the better)'
+                                    : sprintf "%vd", $first_released{$addr};
+                    push @missing_early_files, <<END;
+'$file{$addr}' version $string_version should be copied to '$early{$addr}[1]'.
+END
+                    ;
+                    return;
+                }
+                $progress = $early{$addr}[1];
+                $progress .= ", substituting for $file{$addr}" if $file{$addr};
+                $file{$addr} = $early{$addr}[1];
+                $property{$addr} = $early{$addr}[2];
+
+                # Ignore code points not in the version being compiled
+                push $each_line_handler{$addr}->@*, \&_exclude_unassigned;
+
+                if (   $v_version lt v2.0        # Hanguls in this release ...
+                    && defined $early{$addr}[3]) # ... need special treatment
+                {
+                    push $eof_handler{$addr}->@*, \&_fixup_obsolete_hanguls;
+                }
+            }
+
+            # And this substitute is valid for all releases.
+            $first_released{$addr} = v0;
+        }
+        else {  # Normal behavior
+            $progress = $file{$addr};
+            unshift $early{$addr}->@*, 0; # No substitute
+        }
+
+        my $file = $file{$addr};
+        $progress_message{$addr} = "Processing $progress"
+                                            unless $progress_message{$addr};
+
+        # A file should be there if it is within the window of versions for
+        # which Unicode supplies it
+        if ($withdrawn{$addr} && $withdrawn{$addr} le $v_version) {
+            $in_this_release{$addr} = 0;
+            $skip{$addr} = "";
+        }
+        else {
+            $in_this_release{$addr} = $first_released{$addr} le $v_version;
+
+            # Check that the file for this object (possibly using a substitute
+            # for early releases) exists or we have a function alternative
+            if (   ! $function_instead_of_file
+                && ! main::file_exists($file))
+            {
+                # Here there is nothing available for this release.  This is
+                # fine if we aren't expecting anything in this release.
+                if (! $in_this_release{$addr}) {
+                    $skip{$addr} = "";  # Don't remark since we expected
+                                        # nothing and got nothing
+                }
+                elsif ($optional{$addr}->@*) {
+
+                    # Here the file is optional in this release; Use the
+                    # passed in text to document this case in the pod.
+                    $skip{$addr} = $pod_message_for_non_existent_optional;
+                }
+                elsif (   $in_this_release{$addr}
+                       && ! defined $skip{$addr}
+                       && defined $file)
+                { # Doesn't exist but should.
+                    $skip{$addr} = "'$file' not found.  Possibly Big problems";
+                    Carp::my_carp($skip{$addr});
+                }
+            }
+            elsif ($debug_skip && ! defined $skip{$addr} && ! $non_skip{$addr})
+            {
+
+                # The file exists; if not skipped for another reason, and we are
+                # skipping most everything during debugging builds, use that as
+                # the skip reason.
+                $skip{$addr} = '$debug_skip is on'
+            }
          }
  
-        if ($non_skip{$addr} && ! $debug_skip && $verbosity) {
-            print "Warning: " . __PACKAGE__ . " constructor for $file{$addr} has useless 'non_skip' in it\n";
+        if (   ! $debug_skip
+            && $non_skip{$addr}
+            && ! $required_even_in_debug_skip{$addr}
+            && $verbosity)
+        {
+            print "Warning: " . __PACKAGE__ . " constructor for $file has useless 'non_skip' in it\n";
          }
  
-        # If skipping, set to optional, and add to list of ignored files,
-        # including its reason
-        if ($skip{$addr}) {
-            $optional{$addr} = 1;
-            $skipped_files{$file{$addr}} = $skip{$addr} if $file{$addr};
+        # Here, we have figured out if we will be skipping this file or not.
+        # If so, we add any single property it defines to any passed in
+        # optional property list.  These will be dealt with at run time.
+        if (defined $skip{$addr}) {
+            if ($property{$addr}) {
+                push $optional{$addr}->@*, $property{$addr};
+            }
+        } # Otherwise, are going to process the file.
+        elsif ($property{$addr}) {
+
+            # If the file has a property defined in the constructor for it, it
+            # means that the property is not listed in the file's entries.  So
+            # add a handler (to the list of line handlers) to insert the
+            # property name into the lines, to provide a uniform interface to
+            # the final processing subroutine.
+            push @{$each_line_handler{$addr}}, \&_insert_property_into_line;
          }
          elsif ($properties{$addr}) {
  
-            # Add a handler for each line in the input so that it creates a
-            # separate input line for each property in those input lines, thus
-            # making them suitable for process_generic_property_file().
+            # Similarly, there may be more than one property represented on
+            # each line, with no clue but the constructor input what those
+            # might be.  Add a handler for each line in the input so that it
+            # creates a separate input line for each property in those input
+            # lines, thus making them suitable to handle generically.
  
              push @{$each_line_handler{$addr}},
                   sub {
@@ -2378,7 +2598,7 @@ sub trace { return main::trace(@_); }
                  };
          }
  
-        {   # On non-ascii platforms, we use a special handler
+        {   # On non-ascii platforms, we use a special pre-handler
              no strict;
              no warnings 'once';
              *next_line = (main::NON_ASCII_PLATFORM)
@@ -2386,6 +2606,9 @@ sub trace { return main::trace(@_); }
                           : *_next_line;
          }
  
+        &{$construction_time_handler{$addr}}($self)
+                                        if $construction_time_handler{$addr};
+
          return $self;
      }
  
@@ -2403,13 +2626,13 @@ sub trace { return main::trace(@_); }
          return __PACKAGE__ . " object for " . $self->file;
      }
  
-    # flag to make sure extracted files are processed early
-    my $seen_non_extracted_non_age = 0;
-
      sub run {
          # Process the input object $self.  This opens and closes the file and
          # calls all the handlers for it.  Currently,  this can only be called
-        # once per file, as it destroy's the EOF handler
+        # once per file, as it destroy's the EOF handlers
+
+        # flag to make sure extracted files are processed early
+        state $seen_non_extracted = 0;
  
          my $self = shift;
          Carp::carp_extra_args(\@_) if main::DEBUG && @_;
@@ -2418,61 +2641,14 @@ sub trace { return main::trace(@_); }
  
          my $file = $file{$addr};
  
-        # Don't process if not expecting this file (because released later
-        # than this Unicode version), and isn't there.  This means if someone
-        # copies it into an earlier version's directory, we will go ahead and
-        # process it.
-        return if $first_released{$addr} gt $v_version
-                  && (! defined $file || ! -e $file);
-
-        # If in debugging mode and this file doesn't have the non-skip
-        # flag set, and isn't one of the critical files, skip it.
-        if ($debug_skip
-            && $first_released{$addr} ne v0
-            && ! $non_skip{$addr})
-        {
-            print "Skipping $file in debugging\n" if $verbosity;
-            return;
-        }
-
-        # File could be optional
-        if ($optional{$addr}) {
-            return unless -e $file;
-            my $result = eval $optional{$addr};
-            if (! defined $result) {
-                Carp::my_carp_bug("Got '$@' when tried to eval $optional{$addr}.  $file Skipped.");
-                return;
-            }
-            if (! $result) {
-                if ($verbosity) {
-                    print STDERR "Skipping processing input file '$file' because '$optional{$addr}' is not true\n";
-                }
-                return;
-            }
-        }
-
-        if (! defined $file || ! -e $file) {
-
-            # If the file doesn't exist, see if have internal data for it
-            # (based on first_released being 0).
-            if ($first_released{$addr} eq v0) {
-                $handle{$addr} = 'pretend_is_open';
-            }
-            else {
-                if (! $optional{$addr}  # File could be optional
-                    && $v_version ge $first_released{$addr})
-                {
-                    print STDERR "Skipping processing input file '$file' because not found\n";
-                }
-                return;
-            }
+        if (! $file) {
+            $handle{$addr} = 'pretend_is_open';
          }
          else {
-
-            # Here, the file exists.  Some platforms may change the case of
-            # its name
-            if ($seen_non_extracted_non_age) {
-                if ($file =~ /$EXTRACTED/i) {
+            if ($seen_non_extracted) {
+                if ($file =~ /$EXTRACTED/i) # Some platforms may change the
+                                            # case of the file's name
+                {
                      Carp::my_carp_bug(main::join_lines(<<END
  $file should be processed just after the 'Prop...Alias' files, and before
  anything not in the $EXTRACTED_DIR directory.  Proceeding, but the results may
@@ -2482,81 +2658,149 @@ END
                  }
              }
              elsif ($EXTRACTED_DIR
-                    && $first_released{$addr} ne v0
-                    && $file !~ /$EXTRACTED/i
-                    && lc($file) ne 'dage.txt')
+
+                    # We only do this check for generic property files
+                    && $handler{$addr} == \&main::process_generic_property_file
+
+                    && $file !~ /$EXTRACTED/i)
              {
                  # We don't set this (by the 'if' above) if we have no
                  # extracted directory, so if running on an early version,
                  # this test won't work.  Not worth worrying about.
-                $seen_non_extracted_non_age = 1;
+                $seen_non_extracted = 1;
              }
  
-            # And mark the file as having being processed, and warn if it
+            # Mark the file as having being processed, and warn if it
              # isn't a file we are expecting.  As we process the files,
              # they are deleted from the hash, so any that remain at the
              # end of the program are files that we didn't process.
              my $fkey = File::Spec->rel2abs($file);
-            my $expecting = delete $potential_files{lc($fkey)};
+            my $exists = delete $potential_files{lc($fkey)};
+
+            Carp::my_carp("Was not expecting '$file'.")
+                                    if $exists && ! $in_this_release{$addr};
+
+            # If there is special handling for compiling Unicode releases
+            # earlier than the first one in which Unicode defines this
+            # property ...
+            if ($early{$addr}->@* > 1) {
+
+                # Mark as processed any substitute file that would be used in
+                # such a release
+                $fkey = File::Spec->rel2abs($early{$addr}[1]);
+                delete $potential_files{lc($fkey)};
+
+                # As commented in the constructor code, when using the
+                # official property, we still have to allow the publicly
+                # inaccessible early name so that the core code which uses it
+                # will work regardless.
+                if (! $early{$addr}[0] && $early{$addr}->@* > 2) {
+                    my $early_property_name = $early{$addr}[2];
+                    if ($property{$addr} ne $early_property_name) {
+                        main::property_ref($property{$addr})
+                                            ->add_alias($early_property_name);
+                    }
+                }
+            }
+
+            # We may be skipping this file ...
+            if (defined $skip{$addr}) {
  
-            Carp::my_carp("Was not expecting '$file'.") if
-                    ! $expecting
-                    && ! defined $handle{$addr};
+                # If the file isn't supposed to be in this release, there is
+                # nothing to do
+                if ($in_this_release{$addr}) {
+
+                    # But otherwise, we may print a message
+                    if ($debug_skip) {
+                        print STDERR "Skipping input file '$file'",
+                                     " because '$skip{$addr}'\n";
+                    }
+
+                    # And add it to the list of skipped files, which is later
+                    # used to make the pod
+                    $skipped_files{$file} = $skip{$addr};
+
+                    # The 'optional' list contains properties that are also to
+                    # be skipped along with the file.  (There may also be
+                    # digits which are just placeholders to make sure it isn't
+                    # an empty list
+                    foreach my $property ($optional{$addr}->@*) {
+                        next unless $property =~ /\D/;
+                        my $prop_object = main::property_ref($property);
+                        next unless defined $prop_object;
+                        $prop_object->set_fate($SUPPRESSED, $skip{$addr});
+                    }
+                }
  
-            # Having deleted from expected files, we can quit if not to do
-            # anything.  Don't print progress unless really want verbosity
-            if ($skip{$addr}) {
-                print "Skipping $file.\n" if $verbosity >= $VERBOSE;
                  return;
              }
  
-            # Open the file, converting the slashes used in this program
-            # into the proper form for the OS
+            # Here, we are going to process the file.  Open it, converting the
+            # slashes used in this program into the proper form for the OS
              my $file_handle;
              if (not open $file_handle, "<", $file) {
                  Carp::my_carp("Can't open $file.  Skipping: $!");
-                return 0;
+                return;
              }
              $handle{$addr} = $file_handle; # Cache the open file handle
  
-            if ($v_version ge v3.2.0 && lc($file) ne 'unicodedata.txt') {
+            # If possible, make sure that the file is the correct version.
+            # (This data isn't available on early Unicode releases or in
+            # UnicodeData.txt.)  We don't do this check if we are using a
+            # substitute file instead of the official one (though the code
+            # could be extended to do so).
+            if ($in_this_release{$addr}
+                && ! $early{$addr}[0]
+                && lc($file) ne 'unicodedata.txt')
+            {
                  if ($file !~ /^Unihan/i) {
-                    $_ = <$file_handle>;
-                    if ($_ !~ / - $string_version \. /x) {
-                        chomp;
-                        $_ =~ s/^#\s*//;
-                        die Carp::my_carp("File '$file' is version '$_'.  It should be version $string_version");
+
+                    # The non-Unihan files started getting version numbers in
+                    # 3.2, but some files in 4.0 are unchanged from 3.2, and
+                    # marked as 3.2.  4.0.1 is the first version where there
+                    # are no files marked as being from less than 4.0, though
+                    # some are marked as 4.0.  In versions after that, the
+                    # numbers are correct.
+                    if ($v_version ge v4.0.1) {
+                        $_ = <$file_handle>;    # The version number is in the
+                                                # very first line
+                        if ($_ !~ / - $string_version \. /x) {
+                            chomp;
+                            $_ =~ s/^#\s*//;
+
+                            # 4.0.1 had some valid files that weren't updated.
+                            if (! ($v_version eq v4.0.1 && $_ =~ /4\.0\.0/)) {
+                                die Carp::my_carp("File '$file' is version "
+                                                . "'$_'.  It should be "
+                                                . "version $string_version");
+                            }
+                        }
                      }
                  }
-                else {
+                elsif ($v_version ge v6.0.0) { # Unihan
+
+                    # Unihan files didn't get accurate version numbers until
+                    # 6.0.  The version is somewhere in the first comment
+                    # block
                      while (<$file_handle>) {
                          if ($_ !~ /^#/) {
-                            Carp::my_carp_bug("Could not find the expected version info in file '$file'");
+                            Carp::my_carp_bug("Could not find the expected "
+                                            . "version info in file '$file'");
                              last;
                          }
                          chomp;
                          $_ =~ s/^#\s*//;
                          next if $_ !~ / version: /x;
                          last if $_ =~ /$string_version/;
-                        die Carp::my_carp("File '$file' is '$_'.  It should be version $string_version");
+                        die Carp::my_carp("File '$file' is version "
+                                        . "'$_'.  It should be "
+                                        . "version $string_version");
                      }
                  }
              }
          }
  
-        if ($verbosity >= $PROGRESS) {
-            if ($progress_message{$addr}) {
-                print "$progress_message{$addr}\n";
-            }
-            else {
-                # If using a virtual file, say so.
-                print "Processing ", (-e $file)
-                                       ? $file
-                                       : "substitute $file",
-                                     "\n";
-            }
-        }
-
+        print "$progress_message{$addr}\n" if $verbosity >= $PROGRESS;
  
          # Call any special handler for before the file.
          &{$pre_handler{$addr}}($self) if $pre_handler{$addr};
@@ -2744,11 +2988,11 @@ END
              return 1;
          } # End of looping through lines.
  
-        # If there is an EOF handler, call it (only once) and if it generates
+        # If there are EOF handlers, call each (only once) and if it generates
          # more lines to process go back in the loop to handle them.
-        if ($eof_handler{$addr}) {
-            &{$eof_handler{$addr}}($self);
-            $eof_handler{$addr} = "";   # Currently only get one shot at it.
+        while ($eof_handler{$addr}->@*) {
+            &{$eof_handler{$addr}[0]}($self);
+            shift $eof_handler{$addr}->@*;   # Currently only get one shot at it.
              goto LINE if $added_lines{$addr};
          }
  
@@ -2945,6 +3189,83 @@ END
          return @return;
      }
  
+    sub _exclude_unassigned {
+
+        # Takes the range in $_ and excludes code points that aren't assigned
+        # in this release
+
+        state $skip_inserted_count = 0;
+
+        # Ignore recursive calls.
+        if ($skip_inserted_count) {
+            $skip_inserted_count--;
+            return;
+        }
+
+        # Find what code points are assigned in this release
+        main::calculate_Assigned() if ! defined $Assigned;
+
+        my $self = shift;
+        my $addr = do { no overloading; pack 'J', $self; };
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my ($range, @remainder)
+            = split /\s*;\s*/, $_, -1; # -1 => retain trailing null fields
+
+        # Examine the range.
+        if ($range =~ /^ ($code_point_re) (?:\.\. ($code_point_re) )? $/x)
+        {
+            my $low = hex $1;
+            my $high = (defined $2) ? hex $2 : $low;
+
+            # Split the range into subranges of just those code points in it
+            # that are assigned.
+            my @ranges = (Range_List->new(Initialize
+                              => Range->new($low, $high)) & $Assigned)->ranges;
+
+            # Do nothing if nothing in the original range is assigned in this
+            # release; handle normally if everything is in this release.
+            if (! @ranges) {
+                $_ = "";
+            }
+            elsif (@ranges != 1) {
+
+                # Here, some code points in the original range aren't in this
+                # release; @ranges gives the ones that are.  Create fake input
+                # lines for each of the ranges, and set things up so that when
+                # this routine is called on that fake input, it will do
+                # nothing.
+                $skip_inserted_count = @ranges;
+                my $remainder = join ";", @remainder;
+                for my $range (@ranges) {
+                    $self->insert_lines(sprintf("%04X..%04X;%s",
+                                    $range->start, $range->end, $remainder));
+                }
+                $_ = "";    # The original range is now defunct.
+            }
+        }
+
+        return;
+    }
+
+    sub _fixup_obsolete_hanguls {
+
+        # This is called only when compiling Unicode version 1.  All Unicode
+        # data for subsequent releases assumes that the code points that were
+        # Hangul syllables in this release only are something else, so if
+        # using such data, we have to override it
+
+        my $self = shift;
+        my $addr = do { no overloading; pack 'J', $self; };
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $object = main::property_ref($property{$addr});
+        $object->add_map($FIRST_REMOVED_HANGUL_SYLLABLE,
+                         $FINAL_REMOVED_HANGUL_SYLLABLE,
+                         $early{$addr}[3],  # Passed-in value for these
+                         Replace => $UNCONDITIONALLY);
+    }
+
      sub _insert_property_into_line {
          # Add a property field to $_, if this file requires it.
  
@@ -3029,6 +3350,8 @@ package Multi_Default;
          #        .
          #        .
          #        'U'));
+        # It is best to leave the final value be the one that matches the
+        # above-Unicode code points.
  
          my $class = shift;
  
@@ -3734,7 +4057,7 @@ sub trace { return main::trace(@_); }
          #       => $MULTIPLE_BEFORE means that if this range duplicates an
          #                         existing one, but has a different value,
          #                         don't replace the existing one, but insert
-        #                         this, one so that the same range can occur
+        #                         this one so that the same range can occur
          #                         multiple times.  They are stored LIFO, so
          #                         that the final one inserted is the first one
          #                         returned in an ordered search of the table.
@@ -3749,6 +4072,7 @@ sub trace { return main::trace(@_); }
          #                         existing range, this one is discarded
          #                         (leaving the existing one in its original,
          #                         higher priority position
+        #       => $CROAK         Die with an error if is already there
          #       => anything else  is the same as => $IF_NOT_EQUIVALENT
          #
          # "same value" means identical for non-type-0 ranges, and it means
@@ -3837,7 +4161,7 @@ sub trace { return main::trace(@_); }
  
                  # Here, the new range starts just after the current highest in
                  # the range list, and they have the same type and value.
-                # Extend the current range to incorporate the new one.
+                # Extend the existing range to incorporate the new one.
                  @{$r}[-1]->set_end($end);
              }
  
@@ -4230,7 +4554,7 @@ sub trace { return main::trace(@_); }
              # In other words,
              #   r[$i-1]->end < $start <= r[$i]->end
              # And:
-            #   r[$i-1]->end < $start <= $end <= r[$j+1]->start
+            #   r[$i-1]->end < $start <= $end < r[$j+1]->start
              #
              # Also:
              #   $clean_insert is a boolean which is set true if and only if
@@ -5095,6 +5419,7 @@ sub trace { return main::trace(@_); }
          my $note = delete $args{'Note'};
          my $make_re_pod_entry = delete $args{'Re_Pod_Entry'};
          my $perl_extension = delete $args{'Perl_Extension'};
+        my $suppression_reason = delete $args{'Suppression_Reason'};
  
          # Shouldn't have any left over
          Carp::carp_extra_args(\%args) if main::DEBUG && %args;
@@ -5136,11 +5461,12 @@ END
          {
              $fate{$addr} = $SUPPRESSED;
          }
-        elsif ($fate{$addr} == $SUPPRESSED
-               && ! exists $why_suppressed{$property{$addr}->complete_name})
-        {
-            Carp::my_carp_bug("There is no current capability to set the reason for suppressing.");
-            # perhaps Fate => [ $SUPPRESSED, "reason" ]
+        elsif ($fate{$addr} == $SUPPRESSED) {
+            Carp::my_carp_bug("Need reason for suppressing") unless $suppression_reason;
+            # Though currently unused
+        }
+        elsif ($suppression_reason) {
+            Carp::my_carp_bug("A reason was given for suppressing, but not suppressed");
          }
  
          # If hasn't set its status already, see if it is on one of the
@@ -5268,17 +5594,18 @@ END
          my %args = @_;
          my $loose_match = delete $args{'Fuzzy'};
  
-        my $make_re_pod_entry = delete $args{'Re_Pod_Entry'};
-        $make_re_pod_entry = $YES unless defined $make_re_pod_entry;
-
          my $ok_as_filename = delete $args{'OK_as_Filename'};
          $ok_as_filename = 1 unless defined $ok_as_filename;
  
-        my $status = delete $args{'Status'};
-        $status = $NORMAL unless defined $status;
-
          # An internal name does not get documented, unless overridden by the
-        # input.
+        # input; same for making tests for it.
+        my $status = delete $args{'Status'} || (($name =~ /^_/)
+                                                ? $INTERNAL_ALIAS
+                                                : $NORMAL);
+        my $make_re_pod_entry = delete $args{'Re_Pod_Entry'}
+                                            // (($status ne $INTERNAL_ALIAS)
+                                               ? (($name =~ /^_/) ? $NO : $YES)
+                                               : $NO);
          my $ucd = delete $args{'UCD'} // (($name =~ /^_/) ? 0 : 1);
  
          Carp::carp_extra_args(\%args) if main::DEBUG && %args;
@@ -5348,7 +5675,7 @@ END
                  $insert_position,
                  0,
                  Alias->new($name, $loose_match, $make_re_pod_entry,
-                                                $ok_as_filename, $status, $ucd);
+                           $ok_as_filename, $status, $ucd);
  
          # This name may be shorter than any existing ones, so clear the cache
          # of the shortest, so will have to be recalculated.
@@ -6052,7 +6379,15 @@ END
                                      $range_name = "Hangul Syllable";
                                  }
  
-                                if ($i != $start || $range_end < $end) {
+                                # If the annotation would just repeat what's
+                                # already being output as the range, skip it.
+                                # (When an inversion list is being written, it
+                                # isn't a repeat, as that always is in
+                                # decimal)
+                                if (   $write_as_invlist
+                                    || $i != $start
+                                    || $range_end < $end)
+                                {
                                      if ($range_end < $MAX_WORKING_CODEPOINT)
                                      {
                                          $annotation = sprintf "%04X..%04X",
@@ -6066,7 +6401,11 @@ END
                                  else { # Indent if not displaying code points
                                      $annotation = " " x 4;
                                  }
-                                $annotation .= " $range_name" if $range_name;
+
+                                if ($range_name) {
+                                    $annotation .= " $age[$i]" if $age[$i];
+                                    $annotation .= " $range_name";
+                                }
  
                                  # Include the number of code points in the
                                  # range
@@ -6143,7 +6482,7 @@ END
                                  }
  
                                  if ($include_cp) {
-                                    $annotation = sprintf "%04X", $i;
+                                    $annotation = sprintf "%04X %s", $i, $age[$i];
                                      if ($use_adjustments) {
                                          $annotation .= " => $output_value";
                                      }
@@ -6269,7 +6608,8 @@ END
          }
  
          # Save the reason for suppression for output
-        if ($fate == $SUPPRESSED && defined $reason) {
+        if ($fate >= $SUPPRESSED) {
+            $reason = "" unless defined $reason;
              $why_suppressed{$complete_name{$addr}} = $reason;
          }
  
@@ -6765,7 +7105,7 @@ END
          # The ranges that map to the default aren't output, so subtract that
          # to get those actually output.  A property with matching tables
          # already has the information calculated.
-        if ($property->type != $STRING) {
+        if ($property->type != $STRING && $property->type != $FORCED_BINARY) {
              $count -= $property->table($default_map)->count;
          }
          elsif (defined $default_map) {
@@ -6822,7 +7162,7 @@ END
              else {
                  $cp = "one of the $code_points";
              }
-            $cp .= " in Unicode Version $string_version for which the mapping is not to $map_to";
+            $cp .= " in Unicode Version $unicode_version for which the mapping is not to $map_to";
          }
  
          my $comment = "";
@@ -6841,9 +7181,11 @@ END
          $comment .= "This file returns the $mapping:\n";
  
          my $ucd_accessible_name = "";
+        my $has_underscore_name = 0;
          my $full_name = $self->property->full_name;
          for my $i (0 .. @property_aliases - 1) {
              my $name = $property_aliases[$i]->name;
+            $has_underscore_name = 1 if $name =~ /^_/;
              $comment .= sprintf("%-8s%s\n", " ", $name . '(cp)');
              if ($property_aliases[$i]->ucd) {
                  if ($name eq $full_name) {
@@ -6856,7 +7198,12 @@ END
          }
          $comment .= "\nwhere 'cp' is $cp.";
          if ($ucd_accessible_name) {
-            $comment .= "  Note that $these_mappings $are accessible via the functions prop_invmap('$full_name') or charprop() in Unicode::UCD";
+            $comment .= "  Note that $these_mappings";
+            if ($has_underscore_name) {
+                $comment .= " (except for the one(s) that begin with an underscore)";
+            }
+            $comment .= " $are accessible via the functions prop_invmap('$full_name') or charprop() in Unicode::UCD";
+
          }
  
          # And append any commentary already set from the actual property.
@@ -6890,7 +7237,7 @@ END
                  # There are tables which end up only having one element per
                  # range, but it is not worth keeping track of for making just
                  # this comment a little better.
-                $comment.= <<END;
+                $comment .= <<END;
  non-comment portions of the main body of lines of this file is:
  START\\tSTOP\\tMAPPING where START is the starting code point of the
  range, in hex; STOP is the ending point, or if omitted, the range has just one
@@ -7850,7 +8197,6 @@ END
                              main::uniques($leader, @{$equivalents{$addr}});
          my $has_unrelated = (@parents >= 2);  # boolean, ? are there unrelated
                                                # tables
-
          for my $parent (@parents) {
  
              my $property = $parent->property;
@@ -7908,7 +8254,7 @@ END
                  # commentary that the other combinations are possible.
                  # Because regular expressions don't recognize things like
                  # \p{jsn=}, only look at non-null right-hand-sides
-                my @property_aliases = $table_property->aliases;
+                my @property_aliases = grep { $_->status ne $INTERNAL_ALIAS } $table_property->aliases;
                  my @table_aliases = grep { $_->name ne "" } $table->aliases;
  
                  # The alias lists above are already ordered in the order we
@@ -7920,8 +8266,7 @@ END
                                      ?  main::max(scalar @table_aliases,
                                                   scalar @property_aliases)
                                      : 0;
-                trace "$listed_combos, tables=", scalar @table_aliases, "; names=", scalar @property_aliases if main::DEBUG;
-
+                trace "$listed_combos, tables=", scalar @table_aliases, "; property names=", scalar @property_aliases if main::DEBUG;
  
                  my $property_had_compound_name = 0;
  
@@ -8063,7 +8408,7 @@ resources, every table that matches the identical set of code points in this
  version of Unicode uses this file.  Each one is listed in a separate group
  below.  It could be that the tables will match the same set of code points in
  other Unicode releases, or it could be purely coincidence that they happen to
-be the same in Unicode $string_version, and hence may not in other versions.
+be the same in Unicode $unicode_version, and hence may not in other versions.
  
  END
          }
@@ -8072,8 +8417,14 @@ END
              foreach my $flag (sort keys %flags) {
                  $comment .= <<END;
  '$flag' below means that this form is $flags{$flag}.
-Consult $pod_file.pod
  END
+                if ($flag eq $INTERNAL_ALIAS) {
+                    $comment .= "DO NOT USE!!!";
+                }
+                else {
+                    $comment .= "Consult $pod_file.pod";
+                }
+                $comment .= "\n";
              }
              $comment .= "\n";
          }
@@ -8082,7 +8433,7 @@ END
              Carp::my_carp("No regular expression construct can match $leader, as all names for it are the null string.  Creating file anyway.");
              $comment .= <<END;
  This file returns the $code_points in Unicode Version
-$string_version for
+$unicode_version for
  $leader, but it is inaccessible through Perl regular expressions, as
  "\\p{prop=}" is not recognized.
  END
@@ -8090,7 +8441,7 @@ END
          } else {
              $comment .= <<END;
  This file returns the $code_points in Unicode Version
-$string_version that
+$unicode_version that
  $match$synonyms:
  
  $matches_comment
@@ -8438,18 +8789,27 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
              $perl_extension
                          = $self->perl_extension if ! defined $perl_extension;
  
+            my $fate;
+            my $suppression_reason = "";
+            if ($self->name =~ /^_/) {
+                $fate = $SUPPRESSED;
+                $suppression_reason = "Parent property is internal only";
+            }
+            elsif ($self->fate >= $SUPPRESSED) {
+                $fate = $self->fate;
+                $suppression_reason = $why_suppressed{$self->complete_name};
+
+            }
+            elsif ($name =~ /^_/) {
+                $fate = $INTERNAL_ONLY;
+            }
              $table = Match_Table->new(
                                  Name => $name,
                                  Perl_Extension => $perl_extension,
                                  _Alias_Hash => $table_ref{$addr},
                                  _Property => $self,
-
-                                # gets property's fate and status by default,
-                                # except if the name begind with an
-                                # underscore, default it to internal
-                                Fate => ($name =~ /^_/)
-                                         ? $INTERNAL_ONLY
-                                         : $self->fate,
+                                Fate => $fate,
+                                Suppression_Reason => $suppression_reason,
                                  Status => $self->status,
                                  _Status_Info => $self->status_info,
                                  %args);
@@ -8564,10 +8924,13 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
          # Swash names are used only on either
          # 1) legacy-only properties, because the formats for these are
          #    unchangeable, and they have had these lines in them; or
-        # 2) regular map tables; otherwise there should be no access to the
+        # 2) regular or internal-only map tables
+        # 3) otherwise there should be no access to the
          #    property map table from other parts of Perl.
          return if $map{$addr}->fate != $ORDINARY
-                  && $map{$addr}->fate != $LEGACY_ONLY;
+                  && $map{$addr}->fate != $LEGACY_ONLY
+                  && ! ($map{$addr}->name =~ /^_/
+                        && $map{$addr}->fate == $INTERNAL_ONLY);
  
          return $file{$addr} if defined $file{$addr};
          return $map{$addr}->external_name;
@@ -8591,9 +8954,6 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
          # to it.
          return 0 if $type{$addr} == $STRING;
  
-        # Don't generate anything for unimplemented properties.
-        return 0 if grep { $self->complete_name eq $_ }
-                                                    @unimplemented_properties;
          # Otherwise, do.
          return 1;
      }
@@ -8810,7 +9170,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
          Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
          my $addr = do { no overloading; pack 'J', $self; };
-        if ($fate == $SUPPRESSED) {
+        if ($fate >= $SUPPRESSED) {
              $why_suppressed{$self->complete_name} = $reason;
          }
  
@@ -8896,7 +9256,6 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
  
  package main;
  
-
  sub display_chr {
      # Converts an ordinal printable character value to a displayable string,
      # using a dotted circle to hold combining characters.
@@ -9522,6 +9881,17 @@ sub _operator_not_equal {
      return ! _operator_equal($self, $other);
  }
  
+sub substitute_PropertyAliases($) {
+    # Deal with early releases that don't have the crucial PropertyAliases.txt
+    # file.
+
+    my $file_object = shift;
+    $file_object->insert_lines(get_old_property_aliases());
+
+    process_PropertyAliases($file_object);
+}
+
+
  sub process_PropertyAliases($) {
      # This reads in the PropertyAliases.txt file, which contains almost all
      # the character properties in Unicode and their equivalent aliases:
@@ -9534,11 +9904,6 @@ sub process_PropertyAliases($) {
      my $file= shift;
      Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
-    # This whole file was non-existent in early releases, so use our own
-    # internal one.
-    $file->insert_lines(get_old_property_aliases())
-                                                if ! -e 'PropertyAliases.txt';
-
      # Add any cjk properties that may have been defined.
      $file->insert_lines(@cjk_properties);
  
@@ -9548,8 +9913,17 @@ sub process_PropertyAliases($) {
  
          my $full = $data[1];
  
+        # This line is defective in early Perls.  The property in Unihan.txt
+        # is kRSUnicode.
+        if ($full eq 'Unicode_Radical_Stroke' && @data < 3) {
+            push @data, qw(cjkRSUnicode kRSUnicode);
+        }
+
          my $this = Property->new($data[0], Full_Name => $full);
  
+        $this->set_fate($SUPPRESSED, $why_suppressed{$full})
+                                                    if $why_suppressed{$full};
+
          # Start looking for more aliases after these two.
          for my $i (2 .. @data - 1) {
              $this->add_alias($data[$i]);
@@ -9575,22 +9949,11 @@ sub finish_property_setup {
          Property->new('JSN', Full_Name => 'Jamo_Short_Name');
      }
  
-    # These two properties must be defined in all releases so we can generate
-    # the tables from them to make regex \X work, but suppress their output so
-    # aren't application visible prior to releases where they should be
-    if (! defined property_ref('GCB')) {
-        Property->new('GCB', Full_Name => 'Grapheme_Cluster_Break',
-                      Fate => $PLACEHOLDER);
-    }
-    if (! defined property_ref('hst')) {
-        Property->new('hst', Full_Name => 'Hangul_Syllable_Type',
-                      Fate => $PLACEHOLDER);
-    }
-
      # These are used so much, that we set globals for them.
      $gc = property_ref('General_Category');
      $block = property_ref('Block');
      $script = property_ref('Script');
+    $age = property_ref('Age');
  
      # Perl adds this alias.
      $gc->add_alias('Category');
@@ -9703,24 +10066,17 @@ sub finish_property_setup {
      # for non-assigned code points; 'AL' for assigned.
      if (file_exists("${EXTRACTED}DLineBreak.txt") || -e 'LineBreak.txt') {
          my $lb = property_ref('Line_Break');
-        if ($v_version gt 3.2.0) {
+        if (file_exists("${EXTRACTED}DLineBreak.txt")) {
              $lb->set_default_map('Unknown');
          }
          else {
-            my $default = Multi_Default->new( 'Unknown' => '$gc->table("Cn")',
-                                              'AL');
+            my $default = Multi_Default->new('AL' => '~ $gc->table("Cn")',
+                                             'Unknown',
+                                            );
              $lb->set_default_map($default);
          }
      }
  
-    # If has the URS property, make sure that the standard aliases are in
-    # it, since not in the input tables in some versions.
-    my $urs = property_ref('Unicode_Radical_Stroke');
-    if (defined $urs) {
-        $urs->add_alias('cjkRSUnicode');
-        $urs->add_alias('kRSUnicode');
-    }
-
      # For backwards compatibility with applications that may read the mapping
      # file directly (it was documented in 5.12 and 5.14 as being thusly
      # usable), keep it from being adjusted.  (range_size_1 is
@@ -9876,6 +10232,16 @@ END
      return @return;
  }
  
+sub substitute_PropValueAliases($) {
+    # Deal with early releases that don't have the crucial
+    # PropValueAliases.txt file.
+
+    my $file_object = shift;
+    $file_object->insert_lines(get_old_property_value_aliases());
+
+    process_PropValueAliases($file_object);
+}
+
  sub process_PropValueAliases {
      # This file contains values that properties look like:
      # bc ; AL        ; Arabic_Letter
@@ -9901,35 +10267,29 @@ sub process_PropValueAliases {
      my $file= shift;
      Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
-    # This whole file was non-existent in early releases, so use our own
-    # internal one if necessary.
-    if (! -e 'PropValueAliases.txt') {
-        $file->insert_lines(get_old_property_value_aliases());
-    }
-
      if ($v_version lt 4.0.0) {
          $file->insert_lines(split /\n/, <<'END'
-hst; L                                ; Leading_Jamo
-hst; LV                               ; LV_Syllable
-hst; LVT                              ; LVT_Syllable
-hst; NA                               ; Not_Applicable
-hst; T                                ; Trailing_Jamo
-hst; V                                ; Vowel_Jamo
+Hangul_Syllable_Type; L                                ; Leading_Jamo
+Hangul_Syllable_Type; LV                               ; LV_Syllable
+Hangul_Syllable_Type; LVT                              ; LVT_Syllable
+Hangul_Syllable_Type; NA                               ; Not_Applicable
+Hangul_Syllable_Type; T                                ; Trailing_Jamo
+Hangul_Syllable_Type; V                                ; Vowel_Jamo
  END
          );
      }
      if ($v_version lt 4.1.0) {
          $file->insert_lines(split /\n/, <<'END'
-GCB; CN                               ; Control
-GCB; CR                               ; CR
-GCB; EX                               ; Extend
-GCB; L                                ; L
-GCB; LF                               ; LF
-GCB; LV                               ; LV
-GCB; LVT                              ; LVT
-GCB; T                                ; T
-GCB; V                                ; V
-GCB; XX                               ; Other
+_Perl_GCB; CN                               ; Control
+_Perl_GCB; CR                               ; CR
+_Perl_GCB; EX                               ; Extend
+_Perl_GCB; L                                ; L
+_Perl_GCB; LF                               ; LF
+_Perl_GCB; LV                               ; LV
+_Perl_GCB; LVT                              ; LVT
+_Perl_GCB; T                                ; T
+_Perl_GCB; V                                ; V
+_Perl_GCB; XX                               ; Other
  END
          );
      }
@@ -9944,7 +10304,6 @@ END
      # program generates for this block property value
      #$file->insert_lines('blk; n/a; Herited');
  
-
      # Process each line of the file ...
      while ($file->next_line) {
  
@@ -9961,6 +10320,11 @@ END
          # thus shifting the former field 0 to after them.)
          splice (@data, 0, 0, splice(@data, 1, 2)) if $property eq 'ccc';
  
+        if ($v_version le v5.0.0 && $property eq 'blk' && $data[1] =~ /-/) {
+            my $new_style = $data[1] =~ s/-/_/gr;
+            splice @data, 1, 0, $new_style;
+        }
+
          # Field 0 is a short name unless "n/a"; field 1 is the full name.  If
          # there is no short name, use the full one in element 1
          if ($data[0] eq "n/a") {
@@ -10020,7 +10384,6 @@ END
      # As noted in the comments early in the program, it generates tables for
      # the default values for all releases, even those for which the concept
      # didn't exist at the time.  Here we add those if missing.
-    my $age = property_ref('age');
      if (defined $age && ! defined $age->table('Unassigned')) {
          $age->add_match_table('Unassigned');
      }
@@ -10387,9 +10750,6 @@ sub output_perl_charnames_line ($$) {
  }
  
  { # Closure
-    # This is used to store the range list of all the code points usable when
-    # the little used $compare_versions feature is enabled.
-    my $compare_versions_range_list;
  
      # These are constants to the $property_info hash in this subroutine, to
      # avoid using a quoted-string which might have a typo.
@@ -10494,72 +10854,6 @@ sub output_perl_charnames_line ($$) {
              my $low = hex $1;
              my $high = (defined $2) ? hex $2 : $low;
  
-            # For the very specialized case of comparing two Unicode
-            # versions...
-            if (DEBUG && $compare_versions) {
-                if ($property_name eq 'Age') {
-
-                    # Only allow code points at least as old as the version
-                    # specified.
-                    my $age = pack "C*", split(/\./, $map);        # v string
-                    next LINE if $age gt $compare_versions;
-                }
-                else {
-
-                    # Again, we throw out code points younger than those of
-                    # the specified version.  By now, the Age property is
-                    # populated.  We use the intersection of each input range
-                    # with this property to find what code points in it are
-                    # valid.   To do the intersection, we have to convert the
-                    # Age property map to a Range_list.  We only have to do
-                    # this once.
-                    if (! defined $compare_versions_range_list) {
-                        my $age = property_ref('Age');
-                        if (! -e 'DAge.txt') {
-                            croak "Need to have 'DAge.txt' file to do version comparison";
-                        }
-                        elsif ($age->count == 0) {
-                            croak "The 'Age' table is empty, but its file exists";
-                        }
-                        $compare_versions_range_list
-                                        = Range_List->new(Initialize => $age);
-                    }
-
-                    # An undefined map is always 'Y'
-                    $map = 'Y' if ! defined $map;
-
-                    # Calculate the intersection of the input range with the
-                    # code points that are known in the specified version
-                    my @ranges = ($compare_versions_range_list
-                                  & Range->new($low, $high))->ranges;
-
-                    # If the intersection is empty, throw away this range
-                    next LINE unless @ranges;
-
-                    # Only examine the first range this time through the loop.
-                    my $this_range = shift @ranges;
-
-                    # Put any remaining ranges in the queue to be processed
-                    # later.  Note that there is unnecessary work here, as we
-                    # will do the intersection again for each of these ranges
-                    # during some future iteration of the LINE loop, but this
-                    # code is not used in production.  The later intersections
-                    # are guaranteed to not splinter, so this will not become
-                    # an infinite loop.
-                    my $line = join ';', $property_name, $map;
-                    foreach my $range (@ranges) {
-                        $file->insert_adjusted_lines(sprintf("%04X..%04X; %s",
-                                                            $range->start,
-                                                            $range->end,
-                                                            $line));
-                    }
-
-                    # And process the first range, like any other.
-                    $low = $this_range->start;
-                    $high = $this_range->end;
-                }
-            } # End of $compare_versions
-
              # If changing to a new property, get the things constant per
              # property
              if ($previous_property_name ne $property_name) {
@@ -10965,11 +11259,12 @@ END
          my $file = shift;
          Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
-        # Create a new property specially located that is a combination of the
+        # Create a new property specially located that is a combination of
          # various Name properties: Name, Unicode_1_Name, Named Sequences, and
-        # Name_Alias properties.  (The final duplicates elements of the
-        # first.)  A comment for it will later be constructed based on the
-        # actual properties present and used
+        # _Perl_Name_Alias properties.  (The final one duplicates elements of the
+        # first, and starting in v6.1, is the same as the 'Name_Alias
+        # property.)  A comment for the new property will later be constructed
+        # based on the actual properties present and used
          $perl_charname = Property->new('Perl_Charnames',
                         Default_Map => "",
                         Directory => File::Spec->curdir(),
@@ -12339,6 +12634,68 @@ sub filter_numeric_value_line {
  { # Closure
      my %unihan_properties;
  
+    sub construct_unihan {
+
+        my $file_object = shift;
+
+        return unless file_exists($file_object->file);
+
+        if ($v_version lt v4.0.0) {
+            push @cjk_properties, 'URS ; Unicode_Radical_Stroke';
+            push @cjk_property_values, split "\n", <<'END';
+# @missing: 0000..10FFFF; Unicode_Radical_Stroke; <none>
+END
+        }
+
+        if ($v_version ge v3.0.0) {
+            push @cjk_properties, split "\n", <<'END';
+cjkIRG_GSource; kIRG_GSource
+cjkIRG_JSource; kIRG_JSource
+cjkIRG_KSource; kIRG_KSource
+cjkIRG_TSource; kIRG_TSource
+cjkIRG_VSource; kIRG_VSource
+END
+        push @cjk_property_values, split "\n", <<'END';
+# @missing: 0000..10FFFF; cjkIRG_GSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_JSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_KSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_TSource; <none>
+# @missing: 0000..10FFFF; cjkIRG_VSource; <none>
+END
+        }
+        if ($v_version ge v3.1.0) {
+            push @cjk_properties, 'cjkIRG_HSource; kIRG_HSource';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIRG_HSource; <none>';
+        }
+        if ($v_version ge v3.1.1) {
+            push @cjk_properties, 'cjkIRG_KPSource; kIRG_KPSource';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIRG_KPSource; <none>';
+        }
+        if ($v_version ge v3.2.0) {
+            push @cjk_properties, split "\n", <<'END';
+cjkAccountingNumeric; kAccountingNumeric
+cjkCompatibilityVariant; kCompatibilityVariant
+cjkOtherNumeric; kOtherNumeric
+cjkPrimaryNumeric; kPrimaryNumeric
+END
+            push @cjk_property_values, split "\n", <<'END';
+# @missing: 0000..10FFFF; cjkAccountingNumeric; NaN
+# @missing: 0000..10FFFF; cjkCompatibilityVariant; <code point>
+# @missing: 0000..10FFFF; cjkOtherNumeric; NaN
+# @missing: 0000..10FFFF; cjkPrimaryNumeric; NaN
+END
+        }
+        if ($v_version gt v4.0.0) {
+            push @cjk_properties, 'cjkIRG_USource; kIRG_USource';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIRG_USource; <none>';
+        }
+
+        if ($v_version ge v4.1.0) {
+            push @cjk_properties, 'cjkIICore ; kIICore';
+            push @cjk_property_values, '# @missing: 0000..10FFFF; cjkIICore; <none>';
+        }
+    }
+
      sub setup_unihan {
          # Do any special setup for Unihan properties.
  
@@ -12351,16 +12708,16 @@ sub filter_numeric_value_line {
          my $iicore = property_ref('kIICore');
          if (defined $iicore) {
              $iicore->set_type($FORCED_BINARY);
-            $iicore->table("Y")->add_note("Forced to a binary property as per unicode.org UAX #38.");
+            $iicore->table("Y")->add_note("Matches any code point which has a non-null value for this property; see unicode.org UAX #38.");
  
              # Unicode doesn't include the maps for this property, so don't
              # warn that they are missing.
              $iicore->set_pre_declared_maps(0);
              $iicore->add_comment(join_lines( <<END
-This property contains enum values, but Unicode UAX #38 says it should be
-interpreted as binary, so Perl creates tables for both 1) its enum values,
-plus 2) true/false tables in which it is considered true for all code points
-that have a non-null value
+This property contains string values, but any non-empty ones are considered to
+be 'core', so Perl creates tables for both: 1) its string values, plus 2)
+tables so that \\p{kIICore} matches any code point which has a non-empty
+value for this property.
  END
              ));
          }
@@ -12657,12 +13014,24 @@ sub generate_hst {
  END
  );
  
-    # The Hangul syllables in version 1 are completely different than what came
-    # after, so just ignore them there.
+    # The Hangul syllables in version 1 are at different code points than
+    # those that came along starting in version 2, and have different names;
+    # they comprise about 60% of the code points of the later version.
+    # From my (khw) research on them (see <558493EB.4000807@att.net>), the
+    # initial set is a subset of the later version, with different English
+    # transliterations.  I did not see an easy mapping between them.  The
+    # later set includes essentially all possibilities, even ones that aren't
+    # in modern use (if they ever were), and over 96% of the new ones are type
+    # LVT.  Mathematically, the early set must also contain a preponderance of
+    # LVT values.  In lieu of doing nothing, we just set them all to LVT, and
+    # expect that this will be right most of the time, which is better than
+    # not being right at all.
      if ($v_version lt v2.0.0) {
          my $property = property_ref($file->property);
+        $file->insert_lines(sprintf("%04X..%04X; LVT\n",
+                                    $FIRST_REMOVED_HANGUL_SYLLABLE,
+                                    $FINAL_REMOVED_HANGUL_SYLLABLE));
          push @tables_that_may_be_empty, $property->table('LV')->complete_name;
-        push @tables_that_may_be_empty, $property->table('LVT')->complete_name;
          return;
      }
  
@@ -12712,7 +13081,6 @@ sub generate_GCB {
  
      # Also from http://www.unicode.org/reports/tr29/tr29-3.html.
      foreach my $code_point ( qw{
-                                40000
                                  09BE 09D7 0B3E 0B57 0BBE 0BD7 0CC2 0CD5 0CD6
                                  0D3E 0D57 0DCF 0DDF FF9E FF9F 1D165 1D16E 1D16F
                                  }
@@ -12735,275 +13103,50 @@ sub generate_GCB {
          generate_hst($file);
      }
  
-    return;
+    main::process_generic_property_file($file);
  }
  
-sub setup_early_name_alias {
-    my $file= shift;
-    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
-    # This has the effect of pretending that the Name_Alias property was
-    # available in all Unicode releases.  Strictly speaking, this property
-    # should not be availabe in early releases, but doing this allows
-    # charnames.pm to work on older releases without change.  Prior to v5.16
-    # it had these names hard-coded inside it.  Unicode 6.1 came along and
-    # created these names, and so they were removed from charnames.
+sub fixup_early_perl_name_alias {
  
-    my $aliases = property_ref('Name_Alias');
-    if (! defined $aliases) {
-        $aliases = Property->new('Name_Alias', Default_Map => "");
-    }
+    # Different versions of Unicode have varying support for the name synonyms
+    # below.  Just include everything.  As of 6.1, all these are correct in
+    # the Unicode-supplied file.
  
-    $file->insert_lines(get_old_name_aliases());
+    my $file= shift;
+    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
-    return;
-}
  
-sub get_old_name_aliases () {
+    # ALERT did not come along until 6.0, at which point it became preferred
+    # over BELL.  By inserting it last in early releases, BELL is preferred
+    # over it; and vice-vers in 6.0
+    my $type_for_bell = ($v_version lt v6.0.0)
+               ? 'correction'
+               : 'alternate';
+    $file->insert_lines(split /\n/, <<END
+0007;BELL; $type_for_bell
+000A;LINE FEED (LF);alternate
+000C;FORM FEED (FF);alternate
+000D;CARRIAGE RETURN (CR);alternate
+0085;NEXT LINE (NEL);alternate
+END
+
+    );
  
-    # The Unicode_1_Name field, contains most of these names.  One would
-    # expect, given the field's name, that its values would be fixed across
-    # versions, giving the true Unicode version 1 name for the character.
-    # Sadly, this is not the case.  Actually Version 1.1.5 had no names for
-    # any of the controls; Version 2.0 introduced names for the C0 controls,
-    # and 3.0 introduced C1 names.  3.0.1 removed the name INDEX; and 3.2
-    # changed some names: it
+    # One might think that the the 'Unicode_1_Name' field, could work for most
+    # of the above names, but sadly that field varies depending on the
+    # release.  Version 1.1.5 had no names for any of the controls; Version
+    # 2.0 introduced names for the C0 controls, and 3.0 introduced C1 names.
+    # 3.0.1 removed the name INDEX; and 3.2 changed some names:
      #   changed to parenthesized versions like "NEXT LINE" to
      #       "NEXT LINE (NEL)";
      #   changed PARTIAL LINE DOWN to PARTIAL LINE FORWARD
      #   changed PARTIAL LINE UP to PARTIAL LINE BACKWARD;;
      #   changed e.g. FILE SEPARATOR to INFORMATION SEPARATOR FOUR
-    # This list contains all the names that were defined so that
-    # charnames::vianame(), etc. understand them all EVEN if this version of
-    # Unicode didn't specify them (this could be construed as a bug).
-    # mktables elsewhere gives preference to the Unicode_1_Name field over
-    # these names, so that viacode() will return the correct value for that
-    # version of Unicode, except when that version doesn't define a name,
-    # viacode() will return one anyway (this also could be construed as a
-    # bug).  But these potential "bugs" allow for the smooth working of code
-    # on earlier Unicode releases.
-
-    my @return = split /\n/, <<'END';
-0000;NULL;control
-0000;NUL;abbreviation
-0001;START OF HEADING;control
-0001;SOH;abbreviation
-0002;START OF TEXT;control
-0002;STX;abbreviation
-0003;END OF TEXT;control
-0003;ETX;abbreviation
-0004;END OF TRANSMISSION;control
-0004;EOT;abbreviation
-0005;ENQUIRY;control
-0005;ENQ;abbreviation
-0006;ACKNOWLEDGE;control
-0006;ACK;abbreviation
-0007;BELL;control
-0007;BEL;abbreviation
-0008;BACKSPACE;control
-0008;BS;abbreviation
-0009;CHARACTER TABULATION;control
-0009;HORIZONTAL TABULATION;control
-0009;HT;abbreviation
-0009;TAB;abbreviation
-000A;LINE FEED;control
-000A;LINE FEED (LF);control
-000A;NEW LINE;control
-000A;END OF LINE;control
-000A;LF;abbreviation
-000A;NL;abbreviation
-000A;EOL;abbreviation
-000B;LINE TABULATION;control
-000B;VERTICAL TABULATION;control
-000B;VT;abbreviation
-000C;FORM FEED;control
-000C;FORM FEED (FF);control
-000C;FF;abbreviation
-000D;CARRIAGE RETURN;control
-000D;CARRIAGE RETURN (CR);control
-000D;CR;abbreviation
-000E;SHIFT OUT;control
-000E;LOCKING-SHIFT ONE;control
-000E;SO;abbreviation
-000F;SHIFT IN;control
-000F;LOCKING-SHIFT ZERO;control
-000F;SI;abbreviation
-0010;DATA LINK ESCAPE;control
-0010;DLE;abbreviation
-0011;DEVICE CONTROL ONE;control
-0011;DC1;abbreviation
-0012;DEVICE CONTROL TWO;control
-0012;DC2;abbreviation
-0013;DEVICE CONTROL THREE;control
-0013;DC3;abbreviation
-0014;DEVICE CONTROL FOUR;control
-0014;DC4;abbreviation
-0015;NEGATIVE ACKNOWLEDGE;control
-0015;NAK;abbreviation
-0016;SYNCHRONOUS IDLE;control
-0016;SYN;abbreviation
-0017;END OF TRANSMISSION BLOCK;control
-0017;ETB;abbreviation
-0018;CANCEL;control
-0018;CAN;abbreviation
-0019;END OF MEDIUM;control
-0019;EOM;abbreviation
-001A;SUBSTITUTE;control
-001A;SUB;abbreviation
-001B;ESCAPE;control
-001B;ESC;abbreviation
-001C;INFORMATION SEPARATOR FOUR;control
-001C;FILE SEPARATOR;control
-001C;FS;abbreviation
-001D;INFORMATION SEPARATOR THREE;control
-001D;GROUP SEPARATOR;control
-001D;GS;abbreviation
-001E;INFORMATION SEPARATOR TWO;control
-001E;RECORD SEPARATOR;control
-001E;RS;abbreviation
-001F;INFORMATION SEPARATOR ONE;control
-001F;UNIT SEPARATOR;control
-001F;US;abbreviation
-0020;SP;abbreviation
-007F;DELETE;control
-007F;DEL;abbreviation
-0080;PADDING CHARACTER;figment
-0080;PAD;abbreviation
-0081;HIGH OCTET PRESET;figment
-0081;HOP;abbreviation
-0082;BREAK PERMITTED HERE;control
-0082;BPH;abbreviation
-0083;NO BREAK HERE;control
-0083;NBH;abbreviation
-0084;INDEX;control
-0084;IND;abbreviation
-0085;NEXT LINE;control
-0085;NEXT LINE (NEL);control
-0085;NEL;abbreviation
-0086;START OF SELECTED AREA;control
-0086;SSA;abbreviation
-0087;END OF SELECTED AREA;control
-0087;ESA;abbreviation
-0088;CHARACTER TABULATION SET;control
-0088;HORIZONTAL TABULATION SET;control
-0088;HTS;abbreviation
-0089;CHARACTER TABULATION WITH JUSTIFICATION;control
-0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control
-0089;HTJ;abbreviation
-008A;LINE TABULATION SET;control
-008A;VERTICAL TABULATION SET;control
-008A;VTS;abbreviation
-008B;PARTIAL LINE FORWARD;control
-008B;PARTIAL LINE DOWN;control
-008B;PLD;abbreviation
-008C;PARTIAL LINE BACKWARD;control
-008C;PARTIAL LINE UP;control
-008C;PLU;abbreviation
-008D;REVERSE LINE FEED;control
-008D;REVERSE INDEX;control
-008D;RI;abbreviation
-008E;SINGLE SHIFT TWO;control
-008E;SINGLE-SHIFT-2;control
-008E;SS2;abbreviation
-008F;SINGLE SHIFT THREE;control
-008F;SINGLE-SHIFT-3;control
-008F;SS3;abbreviation
-0090;DEVICE CONTROL STRING;control
-0090;DCS;abbreviation
-0091;PRIVATE USE ONE;control
-0091;PRIVATE USE-1;control
-0091;PU1;abbreviation
-0092;PRIVATE USE TWO;control
-0092;PRIVATE USE-2;control
-0092;PU2;abbreviation
-0093;SET TRANSMIT STATE;control
-0093;STS;abbreviation
-0094;CANCEL CHARACTER;control
-0094;CCH;abbreviation
-0095;MESSAGE WAITING;control
-0095;MW;abbreviation
-0096;START OF GUARDED AREA;control
-0096;START OF PROTECTED AREA;control
-0096;SPA;abbreviation
-0097;END OF GUARDED AREA;control
-0097;END OF PROTECTED AREA;control
-0097;EPA;abbreviation
-0098;START OF STRING;control
-0098;SOS;abbreviation
-0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
-0099;SGC;abbreviation
-009A;SINGLE CHARACTER INTRODUCER;control
-009A;SCI;abbreviation
-009B;CONTROL SEQUENCE INTRODUCER;control
-009B;CSI;abbreviation
-009C;STRING TERMINATOR;control
-009C;ST;abbreviation
-009D;OPERATING SYSTEM COMMAND;control
-009D;OSC;abbreviation
-009E;PRIVACY MESSAGE;control
-009E;PM;abbreviation
-009F;APPLICATION PROGRAM COMMAND;control
-009F;APC;abbreviation
-00A0;NBSP;abbreviation
-00AD;SHY;abbreviation
-200B;ZWSP;abbreviation
-200C;ZWNJ;abbreviation
-200D;ZWJ;abbreviation
-200E;LRM;abbreviation
-200F;RLM;abbreviation
-202A;LRE;abbreviation
-202B;RLE;abbreviation
-202C;PDF;abbreviation
-202D;LRO;abbreviation
-202E;RLO;abbreviation
-FEFF;BYTE ORDER MARK;alternate
-FEFF;BOM;abbreviation
-FEFF;ZWNBSP;abbreviation
-END
-
-    if ($v_version ge v3.0.0) {
-        push @return, split /\n/, <<'END';
-180B; FVS1; abbreviation
-180C; FVS2; abbreviation
-180D; FVS3; abbreviation
-180E; MVS; abbreviation
-202F; NNBSP; abbreviation
-END
-    }
-
-    if ($v_version ge v3.2.0) {
-        push @return, split /\n/, <<'END';
-034F; CGJ; abbreviation
-205F; MMSP; abbreviation
-2060; WJ; abbreviation
-END
-        # Add in VS1..VS16
-        my $cp = 0xFE00 - 1;
-        for my $i (1..16) {
-            push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i);
-        }
-    }
-    if ($v_version ge v4.0.0) { # Add in VS17..VS256
-        my $cp = 0xE0100 - 17;
-        for my $i (17..256) {
-            push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i);
-        }
-    }
-
-    # ALERT did not come along until 6.0, at which point it became preferred
-    # over BELL, and was never in the Unicode_1_Name field.  For the same
-    # reasons, that the other names are made known to all releases by this
-    # function, we make ALERT known too.  By inserting it
-    # last in early releases, BELL is preferred over it; and vice-vers in 6.0
-    my $alert = '0007; ALERT; control';
-    if ($v_version lt v6.0.0) {
-        push @return, $alert;
-    }
-    else {
-        unshift @return, $alert;
-    }
+    #
+    # All these are present in the 6.1 NameAliases.txt
  
-    return @return;
+    return;
  }
  
  sub filter_later_version_name_alias_line {
@@ -13030,9 +13173,8 @@ sub filter_later_version_name_alias_line {
  sub filter_early_version_name_alias_line {
  
      # Early versions did not have the trailing alias type field; implicitly it
-    # was 'correction'.   But our synthetic lines we add in this program do
-    # have it, so test for the type field.
-    $_ .= "; correction" if $_ !~ /;.*;/;
+    # was 'correction'.
+    $_ .= "; correction";
  
      filter_later_version_name_alias_line;
      return;
@@ -13119,9 +13261,9 @@ END
  
      # For each property, fill in any missing mappings, and calculate the re
      # match tables.  If a property has more than one missing mapping, the
-    # default is a reference to a data structure, and requires data from other
-    # properties to resolve.  The sort is used to cause these to be processed
-    # last, after all the other properties have been calculated.
+    # default is a reference to a data structure, and may require data from
+    # other properties to resolve.  The sort is used to cause these to be
+    # processed last, after all the other properties have been calculated.
      # (Fortunately, the missing properties so far don't depend on each other.)
      foreach my $property
          (sort { (defined $a->default_map && ref $a->default_map) ? 1 : -1 }
@@ -13204,72 +13346,81 @@ END
          # Add any remaining code points to the mapping, using the default for
          # missing code points.
          my $default_table;
-        if (defined (my $default_map = $property->default_map)) {
+        my $default_map = $property->default_map;
+        if ($property_type == $FORCED_BINARY) {
  
-            # Make sure there is a match table for the default
-            if (! defined ($default_table = $property->table($default_map))) {
-                $default_table = $property->add_match_table($default_map);
+            # A forced binary property creates a 'Y' table that matches all
+            # non-default values.  The actual string values are also written out
+            # as a map table.  (The default value will almost certainly be the
+            # empty string, so the pod glosses over the distinction, and just
+            # talks about empty vs non-empty.)
+            my $yes = $property->table("Y");
+            foreach my $range ($property->ranges) {
+                next if $range->value eq $default_map;
+                $yes->add_range($range->start, $range->end);
              }
+            $property->table("N")->set_complement($yes);
+        }
+        else {
+            if (defined $default_map) {
  
-            # And, if the property is binary, the default table will just
-            # be the complement of the other table.
-            if ($property_type == $BINARY) {
-                my $non_default_table;
-
-                # Find the non-default table.
-                for my $table ($property->tables) {
-                    next if $table == $default_table;
-                    $non_default_table = $table;
+                # Make sure there is a match table for the default
+                if (! defined ($default_table = $property->table($default_map)))
+                {
+                    $default_table = $property->add_match_table($default_map);
                  }
-                $default_table->set_complement($non_default_table);
-            }
-            else {
  
-                # This fills in any missing values with the default.  It's not
-                # necessary to do this with binary properties, as the default
-                # is defined completely in terms of the Y table.
-                $property->add_map(0, $MAX_WORKING_CODEPOINT,
-                                   $default_map, Replace => $NO);
-            }
-        }
+                # And, if the property is binary, the default table will just
+                # be the complement of the other table.
+                if ($property_type == $BINARY) {
+                    my $non_default_table;
  
-        # Have all we need to populate the match tables.
-        my $maps_should_be_defined = $property->pre_declared_maps;
-        foreach my $range ($property->ranges) {
-            my $map = $range->value;
-            my $table = $property->table($map);
-            if (! defined $table) {
+                    # Find the non-default table.
+                    for my $table ($property->tables) {
+                        if ($table == $default_table) {
+                            if ($v_version le v5.0.0) {
+                                $table->add_alias($_) for qw(N No F False);
+                            }
+                            next;
+                        } elsif ($v_version le v5.0.0) {
+                            $table->add_alias($_) for qw(Y Yes T True);
+                        }
+                        $non_default_table = $table;
+                    }
+                    $default_table->set_complement($non_default_table);
+                }
+                else {
  
-                # Integral and rational property values are not necessarily
-                # defined in PropValueAliases, but whether all the other ones
-                # should be depends on the property.
-                if ($maps_should_be_defined
-                    && $map !~ /^ -? \d+ ( \/ \d+ )? $/x)
-                {
-                    Carp::my_carp("Table '$property_name=$map' should have been defined.  Defining it now.")
+                    # This fills in any missing values with the default.  It's
+                    # not necessary to do this with binary properties, as the
+                    # default is defined completely in terms of the Y table.
+                    $property->add_map(0, $MAX_WORKING_CODEPOINT,
+                                    $default_map, Replace => $NO);
                  }
-                $table = $property->add_match_table($map);
              }
  
-            next if $table->complement != 0;    # Don't need to populate these
-            $table->add_range($range->start, $range->end);
-        }
+            # Have all we need to populate the match tables.
+            my $maps_should_be_defined = $property->pre_declared_maps;
+            foreach my $range ($property->ranges) {
+                my $map = $range->value;
+                my $table = $property->table($map);
+                if (! defined $table) {
  
-        # A forced binary property has additional true/false tables which
-        # should have been set up when it was forced into binary.  The false
-        # table matches exactly the same set as the property's default table.
-        # The true table matches the complement of that.  The false table is
-        # not the same as an additional set of aliases on top of the default
-        # table, so use 'set_equivalent_to'.  If it were implemented as
-        # additional aliases, various things would have to be adjusted, but
-        # especially, if the user wants to get a list of names for the table
-        # using Unicode::UCD::prop_value_aliases(), s/he should get a
-        # different set depending on whether they want the default table or
-        # the false table.
-        if ($property_type == $FORCED_BINARY) {
-            $property->table('N')->set_equivalent_to($default_table,
-                                                     Related => 1);
-            $property->table('Y')->set_complement($default_table);
+                    # Integral and rational property values are not
+                    # necessarily defined in PropValueAliases, but whether all
+                    # the other ones should be depends on the property.
+                    if ($maps_should_be_defined
+                        && $map !~ /^ -? \d+ ( \/ \d+ )? $/x)
+                    {
+                        Carp::my_carp("Table '$property_name=$map' should "
+                                    . "have been defined.  Defining it now.")
+                    }
+                    $table = $property->add_match_table($map);
+                }
+
+                next if $table->complement != 0; # Don't need to populate these
+                $table->add_range($range->start, $range->end);
+            }
          }
  
          # For Perl 5.6 compatibility, all properties matchable in regexes can
@@ -13422,7 +13573,7 @@ sub pre_3_dot_1_Nl () {
      return $Nl;
  }
  
-sub calculate_Assigned() {  # Calculate the gc != Cn code points; may be
+sub calculate_Assigned() {  # Set $Assigned to the gc != Cn code points; may be
                              # called before the Cn's are completely filled.
                              # Works on Unicodes earlier than ones that
                              # explicitly specify Cn.
@@ -13442,6 +13593,339 @@ sub calculate_Assigned() {  # Calculate the gc != Cn code points; may be
      }
  }
  
+sub calculate_DI() {    # Set $DI to a Range_List equivalent to the
+                        # Default_Ignorable_Code_Point property.  Works on
+                        # Unicodes earlier than ones that explicitly specify
+                        # DI.
+    return if defined $DI;
+
+    if (defined (my $di = property_ref('Default_Ignorable_Code_Point'))) {
+        $DI = $di->table('Y');
+    }
+    else {
+        $DI = Range_List->new(Initialize => [ 0x180B .. 0x180D,
+                                              0x2060 .. 0x206F,
+                                              0xFE00 .. 0xFE0F,
+                                              0xFFF0 .. 0xFFFB,
+                                            ]);
+        if ($v_version ge v2.0) {
+            $DI += $gc->table('Cf')
+                +  $gc->table('Cs');
+
+            # These are above the Unicode version 1 max
+            $DI->add_range(0xE0000, 0xE0FFF);
+        }
+        $DI += $gc->table('Cc')
+             - ord("\t")
+             - utf8::unicode_to_native(0x0A)  # LINE FEED
+             - utf8::unicode_to_native(0x0B)  # VERTICAL TAB
+             - ord("\f")
+             - utf8::unicode_to_native(0x0D)  # CARRIAGE RETURN
+             - utf8::unicode_to_native(0x85); # NEL
+    }
+}
+
+sub calculate_NChar() {  # Create a Perl extension match table which is the
+                         # same as the Noncharacter_Code_Point property, and
+                         # set $NChar to point to it.  Works on Unicodes
+                         # earlier than ones that explicitly specify NChar
+    return if defined $NChar;
+
+    $NChar = $perl->add_match_table('_Perl_Nchar',
+                                    Perl_Extension => 1,
+                                    Fate => $INTERNAL_ONLY);
+    if (defined (my $off_nchar = property_ref('NChar'))) {
+        $NChar->initialize($off_nchar->table('Y'));
+    }
+    else {
+        $NChar->initialize([ 0xFFFE .. 0xFFFF ]);
+        if ($v_version ge v2.0) {   # First release with these nchars
+            for (my $i = 0x1FFFE; $i <= 0x10FFFE; $i += 0x10000) {
+                $NChar += [ $i .. $i+1 ];
+            }
+        }
+    }
+}
+
+sub handle_compare_versions () {
+    # This fixes things up for the $compare_versions capability, where we
+    # compare Unicode version X with version Y (with Y > X), and we are
+    # running it on the Unicode Data for version Y.
+    #
+    # It works by calculating the code points whose meaning has been specified
+    # after release X, by using the Age property.  The complement of this set
+    # is the set of code points whose meaning is unchanged between the
+    # releases.  This is the set the program restricts itself to.  It includes
+    # everything whose meaning has been specified by the time version X came
+    # along, plus those still unassigned by the time of version Y.  (We will
+    # continue to use the word 'assigned' to mean 'meaning has been
+    # specified', as it's shorter and is accurate in all cases except the
+    # Noncharacter code points.)
+    #
+    # This function is run after all the properties specified by Unicode have
+    # been calculated for release Y.  This makes sure we get all the nuances
+    # of Y's rules.  (It is done before the Perl extensions are calculated, as
+    # those are based entirely on the Unicode ones.)  But doing it after the
+    # Unicode table calculations means we have to fix up the Unicode tables.
+    # We do this by subtracting the code points that have been assigned since
+    # X (which is actually done by ANDing each table of assigned code points
+    # with the set of unchanged code points).  Most Unicode properties are of
+    # the form such that all unassigned code points have a default, grab-bag,
+    # property value which is changed when the code point gets assigned.  For
+    # these, we just remove the changed code points from the table for the
+    # latter property value, and add them back in to the grab-bag one.  A few
+    # other properties are not entirely of this form and have values for some
+    # or all unassigned code points that are not the grab-bag one.  These have
+    # to be handled specially, and are hard-coded in to this routine based on
+    # manual inspection of the Unicode character database.  A list of the
+    # outlier code points is made for each of these properties, and those
+    # outliers are excluded from adding and removing from tables.
+    #
+    # Note that there are glitches when comparing against Unicode 1.1, as some
+    # Hangul syllables in it were later ripped out and eventually replaced
+    # with other things.
+
+    print "Fixing up for version comparison\n" if $verbosity >= $PROGRESS;
+
+    my $after_first_version = "All matching code points were added after "
+                            . "Unicode $string_compare_versions";
+
+    # Calculate the delta as those code points that have been newly assigned
+    # since the first compare version.
+    my $delta = Range_List->new();
+    foreach my $table ($age->tables) {
+        next if $table == $age->table('Unassigned');
+        next if $table->name le $string_compare_versions;
+        $delta += $table;
+    }
+    if ($delta->is_empty) {
+        die ("No changes; perhaps you need a 'DAge.txt' file?");
+    }
+
+    my $unchanged = ~ $delta;
+
+    calculate_Assigned() if ! defined $Assigned;
+    $Assigned &= $unchanged;
+
+    # $Assigned now contains the code points that were assigned as of Unicode
+    # version X.
+
+    # A block is all or nothing.  If nothing is assigned in it, it all goes
+    # back to the No_Block pool; but if even one code point is assigned, the
+    # block is retained.
+    my $no_block = $block->table('No_Block');
+    foreach my $this_block ($block->tables) {
+        next if     $this_block == $no_block
+                ||  ! ($this_block & $Assigned)->is_empty;
+        $this_block->set_fate($SUPPRESSED, $after_first_version);
+        $no_block += $this_block;
+    }
+
+    my @special_delta_properties;   # List of properties that have to be
+                                    # handled specially.
+    my %restricted_delta;           # Keys are the entries in
+                                    # @special_delta_properties;  values
+                                    # are the range list of the code points
+                                    # that behave normally when they get
+                                    # assigned.
+
+    # In the next three properties, the Default Ignorable code points are
+    # outliers.
+    calculate_DI();
+    $DI &= $unchanged;
+
+    push @special_delta_properties, property_ref('_Perl_GCB');
+    $restricted_delta{$special_delta_properties[-1]} = ~ $DI;
+
+    if (defined (my $cwnfkcc = property_ref('Changes_When_NFKC_Casefolded')))
+    {
+        push @special_delta_properties, $cwnfkcc;
+        $restricted_delta{$special_delta_properties[-1]} = ~ $DI;
+    }
+
+    calculate_NChar();      # Non-character code points
+    $NChar &= $unchanged;
+
+    # This may have to be updated from time-to-time to get the most accurate
+    # results.
+    my $default_BC_non_LtoR = Range_List->new(Initialize =>
+                        # These came from the comments in v8.0 DBidiClass.txt
+                                                        [ # AL
+                                                            0x0600 .. 0x07BF,
+                                                            0x08A0 .. 0x08FF,
+                                                            0xFB50 .. 0xFDCF,
+                                                            0xFDF0 .. 0xFDFF,
+                                                            0xFE70 .. 0xFEFF,
+                                                            0x1EE00 .. 0x1EEFF,
+                                                           # R
+                                                            0x0590 .. 0x05FF,
+                                                            0x07C0 .. 0x089F,
+                                                            0xFB1D .. 0xFB4F,
+                                                            0x10800 .. 0x10FFF,
+                                                            0x1E800 .. 0x1EDFF,
+                                                            0x1EF00 .. 0x1EFFF,
+                                                           # ET
+                                                            0x20A0 .. 0x20CF,
+                                                         ]
+                                          );
+    $default_BC_non_LtoR += $DI + $NChar;
+    push @special_delta_properties, property_ref('BidiClass');
+    $restricted_delta{$special_delta_properties[-1]} = ~ $default_BC_non_LtoR;
+
+    if (defined (my $eaw = property_ref('East_Asian_Width'))) {
+
+        my $default_EA_width_W = Range_List->new(Initialize =>
+                                    # From comments in v8.0 EastAsianWidth.txt
+                                                [
+                                                    0x3400 .. 0x4DBF,
+                                                    0x4E00 .. 0x9FFF,
+                                                    0xF900 .. 0xFAFF,
+                                                    0x20000 .. 0x2A6DF,
+                                                    0x2A700 .. 0x2B73F,
+                                                    0x2B740 .. 0x2B81F,
+                                                    0x2B820 .. 0x2CEAF,
+                                                    0x2F800 .. 0x2FA1F,
+                                                    0x20000 .. 0x2FFFD,
+                                                    0x30000 .. 0x3FFFD,
+                                                ]
+                                             );
+        push @special_delta_properties, $eaw;
+        $restricted_delta{$special_delta_properties[-1]}
+                                                       = ~ $default_EA_width_W;
+
+        # Line break came along in the same release as East_Asian_Width, and
+        # the non-grab-bag default set is a superset of the EAW one.
+        if (defined (my $lb = property_ref('Line_Break'))) {
+            my $default_LB_non_XX = Range_List->new(Initialize =>
+                                        # From comments in v8.0 LineBreak.txt
+                                                        [ 0x20A0 .. 0x20CF ]);
+            $default_LB_non_XX += $default_EA_width_W;
+            push @special_delta_properties, $lb;
+            $restricted_delta{$special_delta_properties[-1]}
+                                                        = ~ $default_LB_non_XX;
+        }
+    }
+
+    # Go through every property, skipping those we've already worked on, those
+    # that are immutable, and the perl ones that will be calculated after this
+    # routine has done its fixup.
+    foreach my $property (property_ref('*')) {
+        next if    $property == $perl     # Done later in the program
+                || $property == $block    # Done just above
+                || $property == $DI       # Done just above
+                || $property == $NChar    # Done just above
+
+                   # The next two are invariant across Unicode versions
+                || $property == property_ref('Pattern_Syntax')
+                || $property == property_ref('Pattern_White_Space');
+
+        #  Find the grab-bag value.
+        my $default_map = $property->default_map;
+
+        if (! $property->to_create_match_tables) {
+
+            # Here there aren't any match tables.  So far, all such properties
+            # have a default map, and don't require special handling.  Just
+            # change each newly assigned code point back to the default map,
+            # as if they were unassigned.
+            foreach my $range ($delta->ranges) {
+                $property->add_map($range->start,
+                                $range->end,
+                                $default_map,
+                                Replace => $UNCONDITIONALLY);
+            }
+        }
+        else {  # Here there are match tables.  Find the one (if any) for the
+                # grab-bag value that unassigned code points go to.
+            my $default_table;
+            if (defined $default_map) {
+                $default_table = $property->table($default_map);
+            }
+
+            # If some code points don't go back to the the grab-bag when they
+            # are considered unassigned, exclude them from the list that does
+            # that.
+            my $this_delta = $delta;
+            my $this_unchanged = $unchanged;
+            if (grep { $_ == $property } @special_delta_properties) {
+                $this_delta = $delta & $restricted_delta{$property};
+                $this_unchanged = ~ $this_delta;
+            }
+
+            # Fix up each match table for this property.
+            foreach my $table ($property->tables) {
+                if (defined $default_table && $table == $default_table) {
+
+                    # The code points assigned after release X (the ones we
+                    # are excluding in this routine) go back on to the default
+                    # (grab-bag) table.  However, some of these tables don't
+                    # actually exist, but are specified solely by the other
+                    # tables.  (In a binary property, we don't need to
+                    # actually have an 'N' table, as it's just the complement
+                    # of the 'Y' table.)  Such tables will be locked, so just
+                    # skip those.
+                    $table += $this_delta unless $table->locked;
+                }
+                else {
+
+                    # Here the table is not for the default value.  We need to
+                    # subtract the code points we are ignoring for this
+                    # comparison (the deltas) from it.  But if the table
+                    # started out with nothing, no need to exclude anything,
+                    # and want to skip it here anyway, so it gets listed
+                    # properly in the pod.
+                    next if $table->is_empty;
+
+                    # Save the deltas for later, before we do the subtraction
+                    my $deltas = $table & $this_delta;
+
+                    $table &= $this_unchanged;
+
+                    # Suppress the table if the subtraction left it with
+                    # nothing in it
+                    if ($table->is_empty) {
+                        if ($property->type == $BINARY) {
+                            push @tables_that_may_be_empty, $table->complete_name;
+                        }
+                        else {
+                            $table->set_fate($SUPPRESSED, $after_first_version);
+                        }
+                    }
+
+                    # Now we add the removed code points to the property's
+                    # map, as they should now map to the grab-bag default
+                    # property (which they did in the first comparison
+                    # version).  But we don't have to do this if the map is
+                    # only for internal use.
+                    if (defined $default_map && $property->to_output_map) {
+
+                        # The gc property has pseudo property values whose names
+                        # have length 1.  These are the union of all the
+                        # property values whose name is longer than 1 and
+                        # whose first letter is all the same.  The replacement
+                        # is done once for the longer-named tables.
+                        next if $property == $gc && length $table->name == 1;
+
+                        foreach my $range ($deltas->ranges) {
+                            $property->add_map($range->start,
+                                            $range->end,
+                                            $default_map,
+                                            Replace => $UNCONDITIONALLY);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    # The above code doesn't work on 'gc=C', as it is a superset of the default
+    # ('Cn') table.  It's easiest to just special case it here.
+    my $C = $gc->table('C');
+    $C += $gc->table('Cn');
+
+    return;
+}
+
  sub compile_perl() {
      # Create perl-defined tables.  Almost all are part of the pseudo-property
      # named 'perl' internally to this program.  Many of these are recommended
@@ -13857,6 +14341,7 @@ sub compile_perl() {
      );
      $Space->add_alias('XPerlSpace');    # Pre-existing synonyms
      $Space->add_alias('SpacePerl');
+    $Space->add_alias('Space') if $v_version lt v4.1.0;
  
      my $Posix_space = $perl->add_match_table("PosixSpace",
                              Description => "\\t, \\n, \\cK, \\f, \\r, and ' '.  (\\cK is vertical tab)",
@@ -13872,11 +14357,19 @@ sub compile_perl() {
                              Initialize => $Cntrl & $ASCII,
                              );
  
+    my $perl_surrogate = $perl->add_match_table('_Perl_Surrogate');
+    my $Cs = $gc->table('Cs');
+    if (defined $Cs && ! $Cs->is_empty) {
+        $perl_surrogate += $Cs;
+    }
+    else {
+        push @tables_that_may_be_empty, '_Perl_Surrogate';
+    }
+
      # $controls is a temporary used to construct Graph.
      my $controls = Range_List->new(Initialize => $gc->table('Unassigned')
-                                                + $gc->table('Control'));
-    # Cs not in release 1
-    $controls += $gc->table('Surrogate') if defined $gc->table('Surrogate');
+                                                + $gc->table('Control')
+                                                + $perl_surrogate);
  
      # Graph is  ~space &  ~(Cc|Cs|Cn) = ~(space + $controls)
      my $Graph = $perl->add_match_table('Graph', Full_Name => 'XPosixGraph',
@@ -13982,14 +14475,21 @@ sub compile_perl() {
      # 31f05a37c4e9c37a7263491f2fc0237d836e1a80 for a more complete description
      # of the MU issue.
      foreach my $range ($loc_problem_folds->ranges) {
-        foreach my $code_point($range->start .. $range->end) {
+        foreach my $code_point ($range->start .. $range->end) {
              my $fold_range = $cf->containing_range($code_point);
              next unless defined $fold_range;
  
+            # Skip if folds to itself
+            next if $fold_range->value eq $CODE_POINT;
+
              my @hex_folds = split " ", $fold_range->value;
-            my $start_cp = hex $hex_folds[0];
+            my $start_cp = $hex_folds[0];
+            next if $start_cp eq $CODE_POINT;
+            $start_cp = hex $start_cp;
              foreach my $i (0 .. @hex_folds - 1) {
-                my $cp = hex $hex_folds[$i];
+                my $cp = $hex_folds[$i];
+                next if $cp eq $CODE_POINT;
+                $cp = hex $cp;
                  next unless $cp > 255;    # Already have the < 256 ones
  
                  $loc_problem_folds->add_range($cp, $cp);
@@ -14009,6 +14509,7 @@ sub compile_perl() {
  
      # Look through all the known folds to populate these tables.
      foreach my $range ($cf->ranges) {
+        next if $range->value eq $CODE_POINT;
          my $start = $range->start;
          my $end = $range->end;
          $any_folds->add_range($start, $end);
@@ -14218,7 +14719,7 @@ sub compile_perl() {
                                      + utf8::unicode_to_native(0xA0) # NBSP
                          );
  
-    my @composition = ('Name', 'Unicode_1_Name', 'Name_Alias');
+    my @composition = ('Name', 'Unicode_1_Name', '_Perl_Name_Alias');
  
      if (@named_sequences) {
          push @composition, 'Named_Sequence';
@@ -14229,15 +14730,15 @@ sub compile_perl() {
  
      my $alias_sentence = "";
      my %abbreviations;
-    my $alias = property_ref('Name_Alias');
-    $perl_charname->set_proxy_for('Name_Alias');
-
-    # Add each entry in Name_Alias to Perl_Charnames.  Where these go with
-    # respect to any existing entry depends on the entry type.  Corrections go
-    # before said entry, as they should be returned in preference over the
-    # existing entry.  (A correction to a correction should be later in the
-    # Name_Alias table, so it will correctly precede the erroneous correction
-    # in Perl_Charnames.)
+    my $alias = property_ref('_Perl_Name_Alias');
+    $perl_charname->set_proxy_for('_Perl_Name_Alias');
+
+    # Add each entry in _Perl_Name_Alias to Perl_Charnames.  Where these go
+    # with respect to any existing entry depends on the entry type.
+    # Corrections go before said entry, as they should be returned in
+    # preference over the existing entry.  (A correction to a correction
+    # should be later in the _Perl_Name_Alias table, so it will correctly
+    # precede the erroneous correction in Perl_Charnames.)
      #
      # Abbreviations go after everything else, so they are saved temporarily in
      # a hash for later.
@@ -14272,7 +14773,7 @@ sub compile_perl() {
          $perl_charname->add_duplicate($code_point, $value, Replace => $replace_type);
      }
      $alias_sentence = <<END;
-The Name_Alias property adds duplicate code point entries that are
+The _Perl_Name_Alias property adds duplicate code point entries that are
  alternatives to the original name.  If an addition is a corrected
  name, it will be physically first in the table.  The original (less correct,
  but still valid) name will be next; then any alternatives, in no particular
@@ -14280,8 +14781,9 @@ order; and finally any abbreviations, again in no particular order.
  END
  
      # Now add the Unicode_1 names for the controls.  The Unicode_1 names had
-    # precedence before 6.1, so should be first in the file; the other names
-    # have precedence starting in 6.1,
+    # precedence before 6.1, including the awful ones like "LINE FEED (LF)",
+    # so should be first in the file; the other names have precedence starting
+    # in 6.1,
      my $before_or_after = ($v_version lt v6.1.0)
                            ? $MULTIPLE_BEFORE
                            : $MULTIPLE_AFTER;
@@ -14311,12 +14813,6 @@ END
                                          Replace => $before_or_after);
      }
  
-    # But in this version only, the ALERT has precedence over BELL, the
-    # Unicode_1_Name that would otherwise have precedence.
-    if ($v_version eq v6.0.0) {
-        $perl_charname->add_duplicate(7, 'ALERT', Replace => $MULTIPLE_BEFORE);
-    }
-
      # Now that have everything added, add in abbreviations after
      # everything else.  Sort so results don't change between runs of this
      # program
@@ -14350,7 +14846,7 @@ END
      ));
  
      # Construct the Present_In property from the Age property.
-    if (-e 'DAge.txt' && defined (my $age = property_ref('Age'))) {
+    if (-e 'DAge.txt' && defined $age) {
          my $default_map = $age->default_map;
          my $in = Property->new('In',
                                  Default_Map => $default_map,
@@ -14453,23 +14949,71 @@ END
                                             # Initialize to what's common in
                                             # all Unicode releases.
                                             Initialize =>
-                                                $Space
-                                                + $gc->table('Control')
+                                                  $gc->table('Control')
+                                                + $Space
+                                                + $patws
+                                                + ((~ $Word) & $ASCII)
                             );
  
-    # In early releases without the proper Unicode properties, just set to \W.
-    if (! defined (my $patsyn = property_ref('Pattern_Syntax'))
-        || ! defined (my $patws = property_ref('Pattern_White_Space'))
-        || ! defined (my $di = property_ref('Default_Ignorable_Code_Point')))
-    {
-        $quotemeta += ~ $Word;
+    if (defined (my $patsyn = property_ref('Pattern_Syntax'))) {
+        $quotemeta += $patsyn->table('Y');
+    }
+    else {
+        $quotemeta += ((~ $Word) & Range->new(0, 255))
+                    - utf8::unicode_to_native(0xA8)
+                    - utf8::unicode_to_native(0xAF)
+                    - utf8::unicode_to_native(0xB2)
+                    - utf8::unicode_to_native(0xB3)
+                    - utf8::unicode_to_native(0xB4)
+                    - utf8::unicode_to_native(0xB7)
+                    - utf8::unicode_to_native(0xB8)
+                    - utf8::unicode_to_native(0xB9)
+                    - utf8::unicode_to_native(0xBC)
+                    - utf8::unicode_to_native(0xBD)
+                    - utf8::unicode_to_native(0xBE);
+        $quotemeta += [ # These are above-Latin1 patsyn; hence should be the
+                        # same in all releases
+                        0x2010 .. 0x2027,
+                        0x2030 .. 0x203E,
+                        0x2041 .. 0x2053,
+                        0x2055 .. 0x205E,
+                        0x2190 .. 0x245F,
+                        0x2500 .. 0x2775,
+                        0x2794 .. 0x2BFF,
+                        0x2E00 .. 0x2E7F,
+                        0x3001 .. 0x3003,
+                        0x3008 .. 0x3020,
+                        0x3030 .. 0x3030,
+                        0xFD3E .. 0xFD3F,
+                        0xFE45 .. 0xFE46
+                      ];
+    }
+
+    if (defined (my $di = property_ref('Default_Ignorable_Code_Point'))) {
+        $quotemeta += $di->table('Y')
      }
      else {
-        $quotemeta += $patsyn->table('Y')
-                   + $patws->table('Y')
-                   + $di->table('Y')
-                   + ((~ $Word) & $ASCII);
+        if ($v_version ge v2.0) {
+            $quotemeta += $gc->table('Cf')
+                       +  $gc->table('Cs');
+
+            # These are above the Unicode version 1 max
+            $quotemeta->add_range(0xE0000, 0xE0FFF);
+        }
+        $quotemeta += $gc->table('Cc')
+                    - $Space;
+        my $temp = Range_List->new(Initialize => [ 0x180B .. 0x180D,
+                                                   0x2060 .. 0x206F,
+                                                   0xFE00 .. 0xFE0F,
+                                                   0xFFF0 .. 0xFFFB,
+                                                   0xE0000 .. 0xE0FFF,
+                                                  ]);
+        $quotemeta += $temp;
      }
+    calculate_DI();
+    $quotemeta += $DI;
+
+    calculate_NChar();
  
      # Finished creating all the perl properties.  All non-internal non-string
      # ones have a synonym of 'Is_' prefixed.  (Internal properties begin with
@@ -14496,9 +15040,7 @@ END
          # can give different annotations for each.
          $unassigned_sans_noncharacters = Range_List->new(
                                      Initialize => $gc->table('Unassigned'));
-        if (defined (my $nonchars = property_ref('Noncharacter_Code_Point'))) {
-            $unassigned_sans_noncharacters &= $nonchars->table('N');
-        }
+        $unassigned_sans_noncharacters &= (~ $NChar);
  
          for (my $i = 0; $i <= $MAX_UNICODE_CODEPOINT + 1; $i++ ) {
              $i = populate_char_info($i);    # Note sets $i so may cause skips
@@ -14790,14 +15332,15 @@ sub register_file_for_name($$$) {
      my $file = shift;            # The file name in the final directory.
      Carp::carp_extra_args(\@_) if main::DEBUG && @_;
  
-    trace "table=$table, file=$file, directory=@$directory_ref" if main::DEBUG && $to_trace;
+    trace "table=$table, file=$file, directory=@$directory_ref, fate=", $table->fate if main::DEBUG && $to_trace;
  
      if ($table->isa('Property')) {
          $table->set_file_path(@$directory_ref, $file);
          push @map_properties, $table;
  
          # No swash means don't do the rest of this.
-        return if $table->fate != $ORDINARY;
+        return if $table->fate != $ORDINARY
+                  && ! ($table->name =~ /^_/ && $table->fate == $INTERNAL_ONLY);
  
          # Get the path to the file
          my @path = $table->file_path;
@@ -14811,7 +15354,12 @@ sub register_file_for_name($$$) {
          # property's map table
          foreach my $alias ($table->aliases) {
              my $name = $alias->name;
-            $loose_property_to_file_of{standardize($name)} = $file;
+            if ($name =~ /^_/) {
+                $strict_property_to_file_of{lc $name} = $file;
+            }
+            else {
+                $loose_property_to_file_of{standardize($name)} = $file;
+            }
          }
  
          # And a way for utf8_heavy to find the proper key in the SwashInfo
@@ -15019,7 +15567,22 @@ sub register_file_for_name($$$) {
          # Remove interior underscores.
          (my $filename = $name) =~ s/ (?<=.) _ (?=.) //xg;
  
-        # Change any non-word character into an underscore, and truncate to 8.
+        # Convert the dot in floating point numbers to an underscore
+        $filename =~ s/\./_/ if $filename =~ / ^ \d+ \. \d+ $ /x;
+
+        my $suffix = "";
+
+        # Extract any suffix, delete any non-word character, and truncate to 3
+        # after the dot
+        if ($filename =~ m/ ( .*? ) ( \. .* ) /x) {
+            $filename = $1;
+            $suffix = $2;
+            $suffix =~ s/\W+//g;
+            substr($suffix, 4) = "" if length($suffix) > 4;
+        }
+
+        # Change any non-word character outside the suffix into an underscore,
+        # and truncate to 8.
          $filename =~ s/\W+/_/g;   # eg., "L&" -> "L_"
          substr($filename, 8) = "" if length($filename) > 8;
  
@@ -15031,7 +15594,7 @@ sub register_file_for_name($$$) {
          #     InGreekE
          #     InGreek2
          my $warned = 0;
-        while (my $num = $base_names{$path}{lc $filename}++) {
+        while (my $num = $base_names{$path}{lc "$filename$suffix"}++) {
              $num++; # so basenames with numbers start with '2', which
                      # just looks more natural.
  
@@ -15538,9 +16101,9 @@ sub make_ucd_table_pod_entries {
              $$info_ref .= $full_name;
          }
  
-        # And the full-name entry includes the short name, if different
+        # And the full-name entry includes the short name, if shorter
          if ($info_ref == \$full_info
-            && $standard_short_name ne $standard_full_name)
+            && length $standard_short_name < length $standard_full_name)
          {
              $full_info =~ s/\.\Z//;
              $full_info .= "  " if $full_info;
@@ -15564,6 +16127,17 @@ sub make_ucd_table_pod_entries {
          $full_info .= ".  " if $full_info;
          $full_info .= $more_info;
      }
+    if ($table->property->type == $FORCED_BINARY) {
+        if ($full_info) {
+            $full_info =~ s/\.\Z//;
+            $full_info .= ".  ";
+        }
+        $full_info .= "This is a combination property which has both:"
+                    . " 1) a map to various string values; and"
+                    . " 2) a map to boolean Y/N, where 'Y' means the"
+                    . " string value is non-empty.  Add the prefix 'is'"
+                    . " to the prop_invmap() call to get the latter";
+    }
  
      # These keep track if have created full and short name pod entries for the
      # property
@@ -15595,6 +16169,9 @@ sub make_ucd_table_pod_entries {
              $info = $other_info;
          }
  
+        $combination_property{$standard} = 1
+                                  if $table->property->type == $FORCED_BINARY;
+
          # Here, we have set up the two columns for this entry.  But if an
          # entry already exists for this name, we have to decide which one
          # we're going to later output.
@@ -15808,6 +16385,7 @@ END
      # The sort will cause the alphabetically first properties to be added to
      # each list first, so each list will be sorted.
      foreach my $property (sort keys %why_suppressed) {
+        next unless $why_suppressed{$property};
          push @{$why_list{$why_suppressed{$property}}}, $property;
      }
  
@@ -15870,13 +16448,12 @@ END
      }
  
      # Similiarly, generate a list of files that we don't use, grouped by the
-    # reasons why.  First, create a hash whose keys are the reasons, and whose
-    # values are anonymous arrays of all the files that share that reason.
+    # reasons why (Don't output if the reason is empty).  First, create a hash
+    # whose keys are the reasons, and whose values are anonymous arrays of all
+    # the files that share that reason.
      my %grouped_by_reason;
-    foreach my $file (keys %ignored_files) {
-        push @{$grouped_by_reason{$ignored_files{$file}}}, $file;
-    }
      foreach my $file (keys %skipped_files) {
+        next unless $skipped_files{$file};
          push @{$grouped_by_reason{$skipped_files{$file}}}, $file;
      }
  
@@ -15937,7 +16514,7 @@ To change this file, edit $0 instead.
  
  =head1 NAME
  
-$pod_file - Index of Unicode Version $string_version character properties in Perl
+$pod_file - Index of Unicode Version $unicode_version character properties in Perl
  
  =head1 DESCRIPTION
  
@@ -16331,6 +16908,10 @@ sub make_Heavy () {
                             = simple_dumper(\%loose_property_name_of, ' ' x 4);
      chomp $loose_property_name_of;
  
+    my $strict_property_name_of
+                           = simple_dumper(\%strict_property_name_of, ' ' x 4);
+    chomp $strict_property_name_of;
+
      my $stricter_to_file_of = simple_dumper(\%stricter_to_file_of, ' ' x 4);
      chomp $stricter_to_file_of;
  
@@ -16369,6 +16950,10 @@ sub make_Heavy () {
                          = simple_dumper(\%loose_property_to_file_of, ' ' x 4);
      chomp $loose_property_to_file_of;
  
+    my $strict_property_to_file_of
+                        = simple_dumper(\%strict_property_to_file_of, ' ' x 4);
+    chomp $strict_property_to_file_of;
+
      my $file_to_swash_name = simple_dumper(\%file_to_swash_name, ' ' x 4);
      chomp $file_to_swash_name;
  
@@ -16384,6 +16969,11 @@ $INTERNAL_ONLY_HEADER
  $loose_property_name_of
  );
  
+# Same, but strict names
+\%utf8::strict_property_name_of = (
+$strict_property_name_of
+);
+
  # Gives the definitions (in the form of inversion lists) for those properties
  # whose definitions aren't kept in files
  \@utf8::inline_definitions = (
@@ -16432,6 +17022,11 @@ $caseless_equivalent_to
  $loose_property_to_file_of
  );
  
+# Property names to mapping files
+\%utf8::strict_property_to_file_of = (
+$strict_property_to_file_of
+);
+
  # Files to the swash names within them.
  \%utf8::file_to_swash_name = (
  $file_to_swash_name
@@ -16803,8 +17398,8 @@ sub make_UCD () {
      # an element for the Hangul syllables in the appropriate place, and
      # otherwise changes the name to include the "-<code point>" suffix.
      my @algorithm_names;
-    my $done_hangul = 0;
-
+    my $done_hangul = $v_version lt v2.0.0;  # Hanguls as we know them came
+                                             # along in this version
      # Copy it linearly.
      for my $i (0 .. @code_points_ending_in_code_point - 1) {
  
@@ -16855,6 +17450,9 @@ sub make_UCD () {
      my $ambiguous_names = simple_dumper(\%ambiguous_names, ' ' x 4);
      chomp $ambiguous_names;
  
+    my $combination_property = simple_dumper(\%combination_property, ' ' x 4);
+    chomp $combination_property;
+
      my $loose_defaults = simple_dumper(\%loose_defaults, ' ' x 4);
      chomp $loose_defaults;
  
@@ -16919,6 +17517,13 @@ $ambiguous_names
  $loose_defaults
  );
  
+# The properties that are combinations, in that they have both a map table and
+# a match table.  This is actually for UCD.t, so it knows how to test for
+# these.
+\%Unicode::UCD::combination_property = (
+$combination_property
+);
+
  # All combinations of names that are suppressed.
  # This is actually for UCD.t, so it knows which properties shouldn't have
  # entries.  If it got any bigger, would probably want to put it in its own
@@ -17019,14 +17624,8 @@ sub write_all_tables() {
              # with it or not.
              my $expected_empty =
  
-                # $perl should be empty, as well as properties that we just
-                # don't do anything with
-                ($is_property
-                    && ($table == $perl
-                        || grep { $complete_name eq $_ }
-                                                    @unimplemented_properties
-                    )
-                )
+                # $perl should be empty
+                ($is_property && ($table == $perl))
  
                  # Match tables in properties we skipped populating should be
                  # empty
@@ -17092,7 +17691,7 @@ sub write_all_tables() {
                  : ($is_property)
                    ? # All these types of map tables will be full because
                      # they will have been populated with defaults
-                    ($type == $ENUM || $type == $FORCED_BINARY)
+                    ($type == $ENUM)
  
                    : # A match table should match everything if its method
                      # shows it should
@@ -17253,12 +17852,14 @@ sub write_all_tables() {
                          }
                      }
                      else {
-                        if (exists ($loose_property_name_of{$alias_standard}))
-                        {
-                            Carp::my_carp("There already is a property with the same standard name as $alias_name: $loose_property_name_of{$alias_standard}.  Old name is retained");
+                        my $hash_ref = ($alias_standard =~ /^_/)
+                                       ? \%strict_property_name_of
+                                       : \%loose_property_name_of;
+                        if (exists $hash_ref->{$alias_standard}) {
+                            Carp::my_carp("There already is a property with the same standard name as $alias_name: $hash_ref->{$alias_standard}.  Old name is retained");
                          }
                          else {
-                            $loose_property_name_of{$alias_standard}
+                            $hash_ref->{$alias_standard}
                                                  = $standard_property_name;
                          }
  
@@ -17509,7 +18110,7 @@ sub generate_tests($$$$$) {
      my @output;
      # Create a complete set of tests, with complements.
      if (defined $valid_code) {
-       push @output, <<"EOC"
+        push @output, <<"EOC"
  Expect(1, $valid_code, '\\p{$name}', $warning);
  Expect(0, $valid_code, '\\p{^$name}', $warning);
  Expect(0, $valid_code, '\\P{$name}', $warning);
@@ -17517,7 +18118,7 @@ Expect(1, $valid_code, '\\P{^$name}', $warning);
  EOC
      }
      if (defined $invalid_code) {
-       push @output, <<"EOC"
+        push @output, <<"EOC"
  Expect(0, $invalid_code, '\\p{$name}', $warning);
  Expect(1, $invalid_code, '\\p{^$name}', $warning);
  Expect(1, $invalid_code, '\\P{$name}', $warning);
@@ -17755,8 +18356,10 @@ sub make_property_test_script() {
              # Test each possible combination of the property's aliases with
              # the table's.  If this gets to be too many, could do what is done
              # in the set_final_comment() for Tables
-            my @table_aliases = $table->aliases;
-            my @property_aliases = $table->property->aliases;
+            my @table_aliases = grep { $_->status ne $INTERNAL_ALIAS } $table->aliases;
+            next unless @table_aliases;
+            my @property_aliases = grep { $_->status ne $INTERNAL_ALIAS } $table->property->aliases;
+            next unless @property_aliases;
  
              # Every property can be optionally be prefixed by 'Is_', so test
              # that those work, by creating such a new alias for each
@@ -17988,84 +18591,89 @@ END
      return;
  }
  
+# Skip reasons, so will be exact same text and hence the files with each
+# reason will get grouped together in perluniprops.
+my $Documentation = "Documentation";
+my $Indic_Skip
+            = "Provisional; for the analysis and processing of Indic scripts";
+my $Validation = "Validation Tests";
+my $Validation_Documentation = "Documentation of validation Tests";
+
  # This is a list of the input files and how to handle them.  The files are
  # processed in their order in this list.  Some reordering is possible if
-# desired, but the v0 files should be first, and the extracted before the
-# others except DAge.txt (as data in an extracted file can be over-ridden by
-# the non-extracted.  Some other files depend on data derived from an earlier
-# file, like UnicodeData requires data from Jamo, and the case changing and
-# folding requires data from Unicode.  Mostly, it is safest to order by first
-# version releases in (except the Jamo).  DAge.txt is read before the
-# extracted ones because of the rarely used feature $compare_versions.  In the
-# unlikely event that there were ever an extracted file that contained the Age
-# property information, it would have to go in front of DAge.
+# desired, but the PropertyAliases and PropValueAliases files should be first,
+# and the extracted before the others (as data in an extracted file can be
+# over-ridden by the non-extracted.  Some other files depend on data derived
+# from an earlier file, like UnicodeData requires data from Jamo, and the case
+# changing and folding requires data from Unicode.  Mostly, it is safest to
+# order by first version releases in (except the Jamo).
  #
  # The version strings allow the program to know whether to expect a file or
  # not, but if a file exists in the directory, it will be processed, even if it
  # is in a version earlier than expected, so you can copy files from a later
  # release into an earlier release's directory.
  my @input_file_objects = (
-    Input_file->new('PropertyAliases.txt', v0,
+    Input_file->new('PropertyAliases.txt', v3.2,
                      Handler => \&process_PropertyAliases,
-                    ),
+                    Early => [ \&substitute_PropertyAliases ],
+                    Required_Even_in_Debug_Skip => 1,
+                   ),
      Input_file->new(undef, v0,  # No file associated with this
                      Progress_Message => 'Finishing property setup',
                      Handler => \&finish_property_setup,
-                    ),
-    Input_file->new('PropValueAliases.txt', v0,
+                   ),
+    Input_file->new('PropValueAliases.txt', v3.2,
                       Handler => \&process_PropValueAliases,
+                     Early => [ \&substitute_PropValueAliases ],
                       Has_Missings_Defaults => $NOT_IGNORED,
-                     ),
-    Input_file->new('DAge.txt', v3.2.0,
-                    Has_Missings_Defaults => $NOT_IGNORED,
-                    Property => 'Age'
+                     Required_Even_in_Debug_Skip => 1,
                      ),
      Input_file->new("${EXTRACTED}DGeneralCategory.txt", v3.1.0,
                      Property => 'General_Category',
-                    ),
+                   ),
      Input_file->new("${EXTRACTED}DCombiningClass.txt", v3.1.0,
                      Property => 'Canonical_Combining_Class',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new("${EXTRACTED}DNumType.txt", v3.1.0,
                      Property => 'Numeric_Type',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new("${EXTRACTED}DEastAsianWidth.txt", v3.1.0,
                      Property => 'East_Asian_Width',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new("${EXTRACTED}DLineBreak.txt", v3.1.0,
                      Property => 'Line_Break',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new("${EXTRACTED}DBidiClass.txt", v3.1.1,
                      Property => 'Bidi_Class',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new("${EXTRACTED}DDecompositionType.txt", v3.1.0,
                      Property => 'Decomposition_Type',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new("${EXTRACTED}DBinaryProperties.txt", v3.1.0),
      Input_file->new("${EXTRACTED}DNumValues.txt", v3.1.0,
                      Property => 'Numeric_Value',
                      Each_Line_Handler => \&filter_numeric_value_line,
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new("${EXTRACTED}DJoinGroup.txt", v3.1.0,
                      Property => 'Joining_Group',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
  
      Input_file->new("${EXTRACTED}DJoinType.txt", v3.1.0,
                      Property => 'Joining_Type',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new('Jamo.txt', v2.0.0,
                      Property => 'Jamo_Short_Name',
                      Each_Line_Handler => \&filter_jamo_line,
-                    ),
+                   ),
      Input_file->new('UnicodeData.txt', v1.1.5,
                      Pre_Handler => \&setup_UnicodeData,
  
@@ -18100,7 +18708,12 @@ my @input_file_objects = (
                                              \&filter_UnicodeData_line,
                                           ],
                      EOF_Handler => \&EOF_UnicodeData,
-                    ),
+                   ),
+    Input_file->new('CJKXREF.TXT', v1.1.5,
+                    Withdrawn => v2.0.0,
+                    Skip => 'Gives the mapping of CJK code points '
+                          . 'between Unicode and various other standards',
+                   ),
      Input_file->new('ArabicShaping.txt', v2.0.0,
                      Each_Line_Handler =>
                          ($v_version lt 4.1.0)
@@ -18110,29 +18723,46 @@ my @input_file_objects = (
                      # not used by Perl
                      Properties => [ '<ignored>', 'Joining_Type', 'Joining_Group' ],
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new('Blocks.txt', v2.0.0,
                      Property => 'Block',
                      Has_Missings_Defaults => $NOT_IGNORED,
                      Each_Line_Handler => \&filter_blocks_lines
-                    ),
+                   ),
+    Input_file->new('Index.txt', v2.0.0,
+                    Skip => 'Alphabetical index of Unicode characters',
+                   ),
+    Input_file->new('NamesList.txt', v2.0.0,
+                    Skip => 'Annotated list of characters',
+                   ),
      Input_file->new('PropList.txt', v2.0.0,
                      Each_Line_Handler => (($v_version lt v3.1.0)
                                              ? \&filter_old_style_proplist
                                              : undef),
-                    ),
+                   ),
+    Input_file->new('Props.txt', v2.0.0,
+                    Withdrawn => v3.0.0,
+                    Skip => 'A subset of F<PropList.txt> (which is used instead)',
+                   ),
+    Input_file->new('ReadMe.txt', v2.0.0,
+                    Skip => $Documentation,
+                   ),
      Input_file->new('Unihan.txt', v2.0.0,
+                    Withdrawn => v5.2.0,
+                    Construction_Time_Handler => \&construct_unihan,
                      Pre_Handler => \&setup_unihan,
-                    Optional => 1,
+                    Optional => [ "",
+                                  'Unicode_Radical_Stroke'
+                                ],
                      Each_Line_Handler => \&filter_unihan_line,
-                        ),
+                   ),
      Input_file->new('SpecialCasing.txt', v2.1.8,
                      Each_Line_Handler => ($v_version eq 2.1.8)
                                           ? \&filter_2_1_8_special_casing_line
                                           : \&filter_special_casing_line,
                      Pre_Handler => \&setup_special_casing,
                      Has_Missings_Defaults => $IGNORED,
-                    ),
+                   ),
      Input_file->new(
                      'LineBreak.txt', v3.0.0,
                      Has_Missings_Defaults => $NOT_IGNORED,
@@ -18141,7 +18771,7 @@ my @input_file_objects = (
                      Each_Line_Handler => (($v_version lt v3.1.0)
                                          ? \&filter_early_ea_lb
                                          : undef),
-                    ),
+                   ),
      Input_file->new('EastAsianWidth.txt', v3.0.0,
                      Property => 'East_Asian_Width',
                      Has_Missings_Defaults => $NOT_IGNORED,
@@ -18149,10 +18779,14 @@ my @input_file_objects = (
                      Each_Line_Handler => (($v_version lt v3.1.0)
                                          ? \&filter_early_ea_lb
                                          : undef),
-                    ),
+                   ),
      Input_file->new('CompositionExclusions.txt', v3.0.0,
                      Property => 'Composition_Exclusion',
-                    ),
+                   ),
+    Input_file->new('UnicodeData.html', v3.0.0,
+                    Withdrawn => v4.0.1,
+                    Skip => $Documentation,
+                   ),
      Input_file->new('BidiMirroring.txt', v3.0.1,
                      Property => 'Bidi_Mirroring_Glyph',
                      Has_Missings_Defaults => ($v_version lt v6.2.0)
@@ -18161,12 +18795,15 @@ my @input_file_objects = (
                                                # anything to us, we will use the
                                                # null string
                                                : $IGNORED,
-
-                    ),
-    Input_file->new("NormTest.txt", v3.0.0,
-                     Handler => \&process_NormalizationsTest,
-                     Skip => ($make_norm_test_script) ? 0 : 'Validation Tests',
-                    ),
+                   ),
+    Input_file->new('NamesList.html', v3.0.0,
+                    Skip => 'Describes the format and contents of '
+                          . 'F<NamesList.txt>',
+                   ),
+    Input_file->new('UnicodeCharacterDatabase.html', v3.0.0,
+                    Withdrawn => v5.1,
+                    Skip => $Documentation,
+                   ),
      Input_file->new('CaseFolding.txt', v3.0.1,
                      Pre_Handler => \&setup_case_folding,
                      Each_Line_Handler =>
@@ -18176,108 +18813,226 @@ my @input_file_objects = (
                             \&filter_case_folding_line
                          ],
                      Has_Missings_Defaults => $IGNORED,
-                    ),
+                   ),
+    Input_file->new("NormTest.txt", v3.0.1,
+                     Handler => \&process_NormalizationsTest,
+                     Skip => ($make_norm_test_script) ? 0 : $Validation,
+                   ),
      Input_file->new('DCoreProperties.txt', v3.1.0,
                      # 5.2 changed this file
                      Has_Missings_Defaults => (($v_version ge v5.2.0)
                                              ? $NOT_IGNORED
                                              : $NO_DEFAULTS),
-                    ),
+                   ),
+    Input_file->new('DProperties.html', v3.1.0,
+                    Withdrawn => v3.2.0,
+                    Skip => $Documentation,
+                   ),
+    Input_file->new('PropList.html', v3.1.0,
+                    Withdrawn => v5.1,
+                    Skip => $Documentation,
+                   ),
      Input_file->new('Scripts.txt', v3.1.0,
                      Property => 'Script',
                      Each_Line_Handler => (($v_version le v4.0.0)
                                            ? \&filter_all_caps_script_names
                                            : undef),
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new('DNormalizationProps.txt', v3.1.0,
                      Has_Missings_Defaults => $NOT_IGNORED,
                      Each_Line_Handler => (($v_version lt v4.0.1)
                                        ? \&filter_old_style_normalization_lines
                                        : undef),
-                    ),
-    Input_file->new('HangulSyllableType.txt', v0,
+                   ),
+    Input_file->new('DerivedProperties.html', v3.1.1,
+                    Withdrawn => v5.1,
+                    Skip => $Documentation,
+                   ),
+    Input_file->new('DAge.txt', v3.2.0,
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    Property => 'Hangul_Syllable_Type',
-                    Pre_Handler => ($v_version lt v4.0.0)
-                                   ? \&generate_hst
-                                   : undef,
-                    ),
+                    Property => 'Age'
+                   ),
+    Input_file->new('HangulSyllableType.txt', v4.0,
+                    Has_Missings_Defaults => $NOT_IGNORED,
+                    Early => [ \&generate_hst, 'Hangul_Syllable_Type' ],
+                    Property => 'Hangul_Syllable_Type'
+                   ),
+    Input_file->new('NormalizationCorrections.txt', v3.2.0,
+                     # This documents the cumulative fixes to erroneous
+                     # normalizations in earlier Unicode versions.  Its main
+                     # purpose is so that someone running on an earlier
+                     # version can use this file to override what got
+                     # published in that earlier release.  It would be easy
+                     # for mktables to handle this file.  But all the
+                     # corrections in it should already be in the other files
+                     # for the release it is.  To get it to actually mean
+                     # something useful, someone would have to be using an
+                     # earlier Unicode release, and copy it into the directory
+                     # for that release and recomplile.  So far there has been
+                     # no demand to do that, so this hasn't been implemented.
+                    Skip => 'Documentation of corrections already '
+                          . 'incorporated into the Unicode data base',
+                   ),
+    Input_file->new('StandardizedVariants.html', v3.2.0,
+                    Skip => 'Provides a visual display of the standard '
+                          . 'variant sequences derived from '
+                          . 'F<StandardizedVariants.txt>.',
+                        # I don't know why the html came earlier than the
+                        # .txt, but both are skipped anyway, so it doesn't
+                        # matter.
+                   ),
+    Input_file->new('StandardizedVariants.txt', v4.0.0,
+                    Skip => 'Certain glyph variations for character display '
+                          . 'are standardized.  This lists the non-Unihan '
+                          . 'ones; the Unihan ones are also not used by '
+                          . 'Perl, and are in a separate Unicode data base '
+                          . 'L<http://www.unicode.org/ivd>',
+                   ),
+    Input_file->new('UCD.html', v4.0.0,
+                    Withdrawn => v5.2,
+                    Skip => $Documentation,
+                   ),
      Input_file->new("$AUXILIARY/WordBreakProperty.txt", v4.1.0,
+                    Early => [ "WBsubst.txt", '_Perl_WB', 'ALetter' ],
                      Property => 'Word_Break',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
-    Input_file->new("$AUXILIARY/GraphemeBreakProperty.txt", v0,
+                   ),
+    Input_file->new("$AUXILIARY/GraphemeBreakProperty.txt", v4.1,
+                    Early => [ \&generate_GCB, '_Perl_GCB' ],
                      Property => 'Grapheme_Cluster_Break',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    Pre_Handler => ($v_version lt v4.1.0)
-                                   ? \&generate_GCB
-                                   : undef,
-                    ),
+                   ),
      Input_file->new("$AUXILIARY/GCBTest.txt", v4.1.0,
                      Handler => \&process_GCB_test,
-                    ),
-    Input_file->new("$AUXILIARY/LBTest.txt", v4.1.0,
-                    Skip => 'Validation Tests',
-                    ),
+                   ),
+    Input_file->new("$AUXILIARY/GraphemeBreakTest.html", v4.1.0,
+                    Skip => $Validation_Documentation,
+                   ),
      Input_file->new("$AUXILIARY/SBTest.txt", v4.1.0,
                      Handler => \&process_SB_test,
-                    ),
+                   ),
+    Input_file->new("$AUXILIARY/SentenceBreakTest.html", v4.1.0,
+                    Skip => $Validation_Documentation,
+                   ),
      Input_file->new("$AUXILIARY/WBTest.txt", v4.1.0,
                      Handler => \&process_WB_test,
-                    ),
+                   ),
+    Input_file->new("$AUXILIARY/WordBreakTest.html", v4.1.0,
+                    Skip => $Validation_Documentation,
+                   ),
      Input_file->new("$AUXILIARY/SentenceBreakProperty.txt", v4.1.0,
                      Property => 'Sentence_Break',
+                    Early => [ "SBsubst.txt", '_Perl_SB', 'OLetter' ],
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
      Input_file->new('NamedSequences.txt', v4.1.0,
                      Handler => \&process_NamedSequences
-                    ),
-    Input_file->new('NameAliases.txt', v0,
+                   ),
+    Input_file->new('Unihan.html', v4.1.0,
+                    Withdrawn => v5.2,
+                    Skip => $Documentation,
+                   ),
+    Input_file->new('NameAliases.txt', v5.0,
                      Property => 'Name_Alias',
-                    Pre_Handler => ($v_version le v6.0.0)
-                                   ? \&setup_early_name_alias
-                                   : undef,
                      Each_Line_Handler => ($v_version le v6.0.0)
                                     ? \&filter_early_version_name_alias_line
                                     : \&filter_later_version_name_alias_line,
-                    ),
+                   ),
+        # NameAliases.txt came along in v5.0.  The above constructor handles
+        # this.  But until 6.1, it was lacking some information needed by core
+        # perl.  The constructor below handles that.  It is either a kludge or
+        # clever, depending on your point of view.  The 'Withdrawn' parameter
+        # indicates not to use it at all starting in 6.1 (so the above
+        # constructor applies), and the 'v6.1' parameter indicates to use the
+        # Early parameter before 6.1.  Therefore 'Early" is always used,
+        # yielding the internal-only property '_Perl_Name_Alias', which it
+        # gets from a NameAliases.txt from 6.1 or later stored in
+        # N_Asubst.txt.  In combination with the above constructor,
+        # 'Name_Alias' is publicly accessible starting with v5.0, and the
+        # better 6.1 version is accessible to perl core in all releases.
+    Input_file->new("NameAliases.txt", v6.1,
+                    Withdrawn => v6.1,
+                    Early => [ "N_Asubst.txt", '_Perl_Name_Alias', "" ],
+                    Property => 'Name_Alias',
+                    EOF_Handler => \&fixup_early_perl_name_alias,
+                    Each_Line_Handler =>
+                                       \&filter_later_version_name_alias_line,
+                   ),
+    Input_file->new('NamedSqProv.txt', v5.0.0,
+                    Skip => 'Named sequences proposed for inclusion in a '
+                          . 'later version of the Unicode Standard; if you '
+                          . 'need them now, you can append this file to '
+                          . 'F<NamedSequences.txt> and recompile perl',
+                   ),
+    Input_file->new("$AUXILIARY/LBTest.txt", v5.1.0,
+                    Skip => $Validation,
+                   ),
+    Input_file->new("$AUXILIARY/LineBreakTest.html", v5.1.0,
+                    Skip => $Validation_Documentation,
+                   ),
      Input_file->new("BidiTest.txt", v5.2.0,
-                    Skip => 'Validation Tests',
-                    ),
+                    Skip => $Validation,
+                   ),
      Input_file->new('UnihanIndicesDictionary.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => "",
                      Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
      Input_file->new('UnihanDataDictionaryLike.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => "",
                      Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
      Input_file->new('UnihanIRGSources.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => [ "",
+                                  'kCompatibilityVariant',
+                                  'kIICore',
+                                  'kIRG_GSource',
+                                  'kIRG_HSource',
+                                  'kIRG_JSource',
+                                  'kIRG_KPSource',
+                                  'kIRG_MSource',
+                                  'kIRG_KSource',
+                                  'kIRG_TSource',
+                                  'kIRG_USource',
+                                  'kIRG_VSource',
+                               ],
                      Pre_Handler => \&setup_unihan,
                      Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
      Input_file->new('UnihanNumericValues.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => [ "",
+                                  'kAccountingNumeric',
+                                  'kOtherNumeric',
+                                  'kPrimaryNumeric',
+                                ],
                      Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
      Input_file->new('UnihanOtherMappings.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => "",
                      Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
      Input_file->new('UnihanRadicalStrokeCounts.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => [ "",
+                                  'Unicode_Radical_Stroke'
+                                ],
                      Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
      Input_file->new('UnihanReadings.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => "",
                      Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
      Input_file->new('UnihanVariants.txt', v5.2.0,
-                    Optional => 1,
+                    Optional => "",
                      Each_Line_Handler => \&filter_unihan_line,
-                    ),
+                   ),
+    Input_file->new('CJKRadicals.txt', v5.2.0,
+                    Skip => 'Maps the kRSUnicode property values to '
+                          . 'corresponding code points',
+                   ),
+    Input_file->new('EmojiSources.txt', v6.0.0,
+                    Skip => 'Maps certain Unicode code points to their '
+                          . 'legacy Japanese cell-phone values',
+                   ),
      Input_file->new('ScriptExtensions.txt', v6.0.0,
                      Property => 'Script_Extensions',
                      Pre_Handler => \&setup_script_extensions,
@@ -18285,39 +19040,74 @@ my @input_file_objects = (
                      Has_Missings_Defaults => (($v_version le v6.0.0)
                                              ? $NO_DEFAULTS
                                              : $IGNORED),
-                    ),
-    # The two Indic files are actually available starting in v6.0.0, but their
-    # property values are missing from PropValueAliases.txt in that release,
-    # so that further work would have to be done to get them to work properly
-    # for that release.
-    Input_file->new('IndicMatraCategory.txt', v6.1.0,
+                   ),
+    # These two Indic files are actually not usable as-is until 6.1.0,
+    # because their property values are missing from PropValueAliases.txt
+    # until that release, so that further work would have to be done to get
+    # them to work properly, which isn't worth it because of them being
+    # provisional.
+    Input_file->new('IndicMatraCategory.txt', v6.0.0,
+                    Withdrawn => v8.0.0,
                      Property => 'Indic_Matra_Category',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    Skip => "Withdrawn by Unicode while still provisional",
-                    ),
-    Input_file->new('IndicSyllabicCategory.txt', v6.1.0,
+                    Skip => $Indic_Skip,
+                   ),
+    Input_file->new('IndicSyllabicCategory.txt', v6.0.0,
                      Property => 'Indic_Syllabic_Category',
                      Has_Missings_Defaults => $NOT_IGNORED,
                      Skip => (($v_version lt v8.0.0)
-                              ? "Provisional; for the analysis and processing of Indic scripts"
+                              ? $Indic_Skip
                                : 0),
-                    ),
+                   ),
+    Input_file->new('USourceData.txt', v6.2.0,
+                    Skip => 'Documentation of status and cross reference of '
+                          . 'proposals for encoding by Unicode of Unihan '
+                          . 'characters',
+                   ),
+    Input_file->new('USourceGlyphs.pdf', v6.2.0,
+                    Skip => 'Pictures of the characters in F<USourceData.txt>',
+                   ),
      Input_file->new('BidiBrackets.txt', v6.3.0,
-                    Properties => [ 'Bidi_Paired_Bracket', 'Bidi_Paired_Bracket_Type' ],
+                    Properties => [ 'Bidi_Paired_Bracket',
+                                    'Bidi_Paired_Bracket_Type'
+                                  ],
                      Has_Missings_Defaults => $NO_DEFAULTS,
-                    ),
+                   ),
      Input_file->new("BidiCharacterTest.txt", v6.3.0,
-                    Skip => 'Validation Tests',
-                    ),
+                    Skip => $Validation,
+                   ),
      Input_file->new('IndicPositionalCategory.txt', v8.0.0,
                      Property => 'Indic_Positional_Category',
                      Has_Missings_Defaults => $NOT_IGNORED,
-                    ),
+                   ),
  );
  
  # End of all the preliminaries.
  # Do it...
  
+if (@missing_early_files) {
+    print simple_fold(join_lines(<<END
+
+The compilation cannot be completed because one or more required input files,
+listed below, are missing.  This is because you are compiling Unicode version
+$unicode_version, which predates the existence of these file(s).  To fully
+function, perl needs the data that these files would have contained if they
+had been in this release.  To work around this, create copies of later
+versions of the missing files in the directory containing '$0'.  (Perl will
+make the necessary adjustments to the data to compensate for it not being the
+same version as is being compiled.)  The files are available from unicode.org,
+via either ftp or http.  If using http, they will be under
+www.unicode.org/versions/.  Below are listed the source file name of each
+missing file, the Unicode version to copy it from, and the name to store it
+as.  (Note that the listed source file name may not be exactly the one that
+Unicode calls it.  If you don't find it, you can look it up in 'README.perl'
+to get the correct name.)
+END
+    ));
+    print simple_fold(join_lines("\n$_")) for @missing_early_files;
+    exit 2;
+}
+
  if ($compare_versions) {
      Carp::my_carp(<<END
  Warning.  \$compare_versions is set.  Output is not suitable for production
@@ -18326,17 +19116,13 @@ END
  }
  
  # Put into %potential_files a list of all the files in the directory structure
-# that could be inputs to this program, excluding those that we should ignore.
-# Use absolute file names because it makes it easier across machine types.
-my @ignored_files_full_names = map { File::Spec->rel2abs(
-                                     internal_file_to_platform($_))
-                                } keys %ignored_files;
+# that could be inputs to this program
  File::Find::find({
      wanted=>sub {
-        return unless /\.txt$/i;  # Some platforms change the name's case
+        return unless / \. ( txt | htm l? ) $ /xi;  # Some platforms change the
+                                                    # name's case
          my $full = lc(File::Spec->rel2abs($_));
-        $potential_files{$full} = 1
-                    if ! grep { $full eq lc($_) } @ignored_files_full_names;
+        $potential_files{$full} = 1;
          return;
      }
  }, File::Spec->curdir());
@@ -18394,8 +19180,7 @@ else {
              # The paths are stored with relative names, and with '/' as the
              # delimiter; convert to absolute on this machine
              my $full = lc(File::Spec->rel2abs(internal_file_to_platform($input)));
-            $potential_files{lc $full} = 1
-                if ! grep { lc($full) eq lc($_) } @ignored_files_full_names;
+            $potential_files{lc $full} = 1;
          }
      }
  
@@ -18472,7 +19257,7 @@ my @input_files = qw(version Makefile);
  foreach my $object (@input_file_objects) {
      my $file = $object->file;
      next if ! defined $file;    # Not all objects have files
-    next if $object->optional && ! -e $file;
+    next if defined $object->skip;;
      push @input_files,  $file;
  }
  
@@ -18494,7 +19279,6 @@ foreach my $in (@input_files) {
          my ($volume, $directories, $file ) = File::Spec->splitpath($in);
          $directories =~ s;/$;;;     # Can have extraneous trailing '/'
          my @directories = File::Spec->splitdir($directories);
-        my $base = $file =~ s/\.txt$//;
          construct_filename($file, 'mutable', \@directories);
      }
  }
@@ -18547,6 +19331,11 @@ foreach my $file (@input_file_objects) {
  print "Finishing processing Unicode properties\n" if $verbosity >= $PROGRESS;
  finish_Unicode();
  
+# For the very specialized case of comparing two Unicode versions...
+if (DEBUG && $compare_versions) {
+    handle_compare_versions();
+}
+
  print "Compiling Perl properties\n" if $verbosity >= $PROGRESS;
  compile_perl();