perluniprops: Add info about unused Unicode files

[perl5.git] / lib / unicore / mktables
diff --git a/lib/unicore/mktables b/lib/unicore/mktables

index bef295d..7ddc258 100644 (file)
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -844,6 +844,10 @@ my %global_to_output_map = (
  
      Present_In => 0,                # Suppress, as easily computed from Age
      Block => 0,                     # Suppress, as Blocks.txt is retained.
+
+    # Suppress, as mapping can be found instead from the
+    # Perl_Decomposition_Mapping file
+    Decomposition_Type => 0,
  );
  
  # Properties that this program ignores.
@@ -877,6 +881,10 @@ my %why_obsolete;    # Documentation only
          'Other_Lowercase' => $contributory,
          'Other_Math' => $contributory,
          'Other_Uppercase' => $contributory,
+        'Expands_On_NFC' => $why_no_expand,
+        'Expands_On_NFD' => $why_no_expand,
+        'Expands_On_NFKC' => $why_no_expand,
+        'Expands_On_NFKD' => $why_no_expand,
      );
  
      %why_suppressed = (
@@ -897,15 +905,17 @@ my %why_obsolete;    # Documentation only
          'Name_Alias' => "Accessible via 'use charnames;'",
  
          FC_NFKC_Closure => 'Supplanted in usage by NFKC_Casefold; otherwise not useful',
-        Expands_On_NFC => $why_no_expand,
-        Expands_On_NFD => $why_no_expand,
-        Expands_On_NFKC => $why_no_expand,
-        Expands_On_NFKD => $why_no_expand,
      );
  
      # The following are suppressed because they were made contributory or
      # deprecated by Unicode before Perl ever thought about supporting them.
-    foreach my $property ('Jamo_Short_Name', 'Grapheme_Link') {
+    foreach my $property ('Jamo_Short_Name',
+                          'Grapheme_Link',
+                          'Expands_On_NFC',
+                          'Expands_On_NFD',
+                          'Expands_On_NFKC',
+                          'Expands_On_NFKD'
+    ) {
          $why_suppressed{$property} = $why_deprecated{$property};
      }
  
@@ -925,7 +935,7 @@ if ($v_version ge 4.0.0) {
  if ($v_version ge 5.2.0 && $v_version lt 6.0.0) {
      $why_obsolete{'ISO_Comment'} = 'Code points for it have been removed';
      if ($v_version ge 6.0.0) {
-        $why_deprecated{'ISO_Comment'} = 'No longer needed for chart generation; otherwise not useful, and code points for it have been removed';
+        $why_deprecated{'ISO_Comment'} = 'No longer needed for Unicode\'s internal chart generation; otherwise not useful, and code points for it have been removed';
      }
  }
  
@@ -1043,18 +1053,21 @@ my %default_mapping = (
  
  # Below are files that Unicode furnishes, but this program ignores, and why
  my %ignored_files = (
-    'CJKRadicals.txt' => 'Unihan data',
-    'Index.txt' => 'An index, not actual data',
-    'NamedSqProv.txt' => 'Not officially part of the Unicode standard; Append it to NamedSequences.txt if you want to process the contents.',
-    'NamesList.txt' => 'Just adds commentary',
-    'NormalizationCorrections.txt' => 'Data is already in other files.',
-    'Props.txt' => 'Adds nothing to PropList.txt; only in very early releases',
-    'ReadMe.txt' => 'Just comments',
-    'README.TXT' => 'Just comments',
-    'StandardizedVariants.txt' => 'Only for glyph changes, not a Unicode character property.  Does not fit into current scheme where one code point is mapped',
-    'EmojiSources.txt' => 'Not of general utility: for Japanese legacy cell-phone applications',
-    'IndicMatraCategory.txt' => 'Provisional',
-    'IndicSyllabicCategory.txt' => 'Provisional',
+    'CJKRadicals.txt' => 'Maps the kRSUnicode property values to corresponding code points',
+    'Index.txt' => 'Alphabetical index of Unicode characters',
+    'NamedSqProv.txt' => 'Named sequences proposed for inclusion in a later version of the Unicode Standard; if you need them now, you can append this file to F<NamedSequences.txt> and recompile perl',
+    'NamesList.txt' => 'Annotated list of characters',
+    'NormalizationCorrections.txt' => 'Documentation of corrections already incorporated into the Unicode data base',
+    'Props.txt' => 'Only in very early releases; is a subset of F<PropList.txt> (which is used instead)',
+    'ReadMe.txt' => 'Documentation',
+    'StandardizedVariants.txt' => 'Certain glyph variations for character display are standardized.  This lists the non-Unihan ones; the Unihan ones are also not used by Perl, and are in a separate Unicode data base L<http://www.unicode.org/ivd>',
+    'EmojiSources.txt' => 'Maps certain Unicode code points to their legacy Japanese cell-phone values',
+    'IndicMatraCategory.txt' => 'Provisional; for the analysis and processing of Indic scripts',
+    'IndicSyllabicCategory.txt' => 'Provisional; for the analysis and processing of Indic scripts',
+    'auxiliary/WordBreakTest.html' => 'Documentation of validation tests',
+    'auxiliary/SentenceBreakTest.html' => 'Documentation of validation tests',
+    'auxiliary/GraphemeBreakTest.html' => 'Documentation of validation tests',
+    'auxiliary/LineBreakTest.html' => 'Documentation of validation tests',
  );
  
  ### End of externally interesting definitions, except for @input_file_objects
@@ -1082,16 +1095,16 @@ my $DEVELOPMENT_ONLY=<<"EOF";
  
  EOF
  
-my $LAST_UNICODE_CODEPOINT_STRING = "10FFFF";
-my $LAST_UNICODE_CODEPOINT = hex $LAST_UNICODE_CODEPOINT_STRING;
-my $MAX_UNICODE_CODEPOINTS = $LAST_UNICODE_CODEPOINT + 1;
+my $MAX_UNICODE_CODEPOINT_STRING = "10FFFF";
+my $MAX_UNICODE_CODEPOINT = hex $MAX_UNICODE_CODEPOINT_STRING;
+my $MAX_UNICODE_CODEPOINTS = $MAX_UNICODE_CODEPOINT + 1;
  
  # Matches legal code point.  4-6 hex numbers, If there are 6, the first
  # two must be 10; if there are 5, the first must not be a 0.  Written this way
-# to decrease backtracking.  The first one allows the code point to be at the
-# end of a word, but to work properly, the word shouldn't end with a valid hex
-# character.  The second one won't match a code point at the end of a word,
-# and doesn't have the run-on issue
+# to decrease backtracking.  The first regex allows the code point to be at
+# the end of a word, but to work properly, the word shouldn't end with a valid
+# hex character.  The second one won't match a code point at the end of a
+# word, and doesn't have the run-on issue
  my $run_on_code_point_re =
              qr/ (?: 10[0-9A-F]{4} | [1-9A-F][0-9A-F]{4} | [0-9A-F]{4} ) \b/x;
  my $code_point_re = qr/\b$run_on_code_point_re/;
@@ -1101,7 +1114,7 @@ my $code_point_re = qr/\b$run_on_code_point_re/;
  # depends on this ending with a semi-colon, so it can assume it is a valid
  # field when the line is split() by semi-colons
  my $missing_defaults_prefix =
-            qr/^#\s+\@missing:\s+0000\.\.$LAST_UNICODE_CODEPOINT_STRING\s*;/;
+            qr/^#\s+\@missing:\s+0000\.\.$MAX_UNICODE_CODEPOINT_STRING\s*;/;
  
  # Property types.  Unicode has more types, but these are sufficient for our
  # purposes.
@@ -1265,6 +1278,28 @@ my %Jamo_L;     # Leading consonants
  my %Jamo_V;     # Vowels
  my %Jamo_T;     # Trailing consonants
  
+# For code points whose name contains its ordinal as a '-ABCD' suffix.
+# The key is the base name of the code point, and the value is an
+# array giving all the ranges that use this base name.  Each range
+# is actually a hash giving the 'low' and 'high' values of it.
+my %names_ending_in_code_point;
+my %loose_names_ending_in_code_point;   # Same as above, but has blanks, dashes
+                                        # removed from the names
+# Inverse mapping.  The list of ranges that have these kinds of
+# names.  Each element contains the low, high, and base names in an
+# anonymous hash.
+my @code_points_ending_in_code_point;
+
+# Boolean: does this Unicode version have the hangul syllables, and are we
+# writing out a table for them?
+my $has_hangul_syllables = 0;
+
+# Does this Unicode version have code points whose names end in their
+# respective code points, and are we writing out a table for them?  0 for no;
+# otherwise points to first property that a table is needed for them, so that
+# if multiple tables are needed, we don't create duplicates
+my $needing_code_points_ending_in_code_point = 0;
+
  my @backslash_X_tests;     # List of tests read in for testing \X
  my @unhandled_properties;  # Will contain a list of properties found in
                             # the input that we didn't process.
@@ -1938,12 +1973,15 @@ sub trace { return main::trace(@_); }
      main::set_access('non_skip', \%non_skip, 'c');
  
      my %skip;
-    # This is used to skip processing of this input file semi-permanently.
-    # It is used for files that we aren't planning to process anytime soon,
-    # but want to allow to be in the directory and not raise a message that we
-    # are not handling.  Mostly for test files.  This is in contrast to the
-    # non_skip element, which is supposed to be used very temporarily for
-    # debugging.  Sets 'optional' to 1
+    # This is used to skip processing of this input file semi-permanently,
+    # when it evaluates to true.  The value should be the reason the file is
+    # being skipped.  It is used for files that we aren't planning to process
+    # anytime soon, but want to allow to be in the directory and not raise a
+    # message that we are not handling.  Mostly for test files.  This is in
+    # contrast to the non_skip element, which is supposed to be used very
+    # temporarily for debugging.  Sets 'optional' to 1.  Also, files that we
+    # pretty much will never look at can be placed in the global
+    # %ignored_files instead.  Ones used here will be added to that list.
      main::set_access('skip', \%skip, 'c');
  
      my %each_line_handler;
@@ -2066,7 +2104,12 @@ sub trace { return main::trace(@_); }
              print "Warning: " . __PACKAGE__ . " constructor for $file{$addr} has useless 'non_skip' in it\n";
          }
  
-        $optional{$addr} = 1 if $skip{$addr};
+        # If skipping, set to optional, and add to list of ignored files,
+        # including its reason
+        if ($skip{$addr}) {
+            $optional{$addr} = 1;
+            $ignored_files{$file{$addr}} = $skip{$addr}
+        }
  
          return $self;
      }
@@ -2629,10 +2672,11 @@ package Alias;
      # Should this name match loosely or not.
      main::set_access('loose_match', \%loose_match, 'r');
  
-    my %make_pod_entry;
-    # Some aliases should not get their own entries because they are covered
-    # by a wild-card, and some we want to discourage use of.  Binary
-    main::set_access('make_pod_entry', \%make_pod_entry, 'r');
+    my %make_re_pod_entry;
+    # Some aliases should not get their own entries in the re section of the
+    # pod, because they are covered by a wild-card, and some we want to
+    # discourage use of.  Binary
+    main::set_access('make_re_pod_entry', \%make_re_pod_entry, 'r');
  
      my %status;
      # Aliases have a status, like deprecated, or even suppressed (which means
@@ -2653,7 +2697,7 @@ package Alias;
  
          $name{$addr} = shift;
          $loose_match{$addr} = shift;
-        $make_pod_entry{$addr} = shift;
+        $make_re_pod_entry{$addr} = shift;
          $externally_ok{$addr} = shift;
          $status{$addr} = shift;
  
@@ -3030,7 +3074,7 @@ sub trace { return main::trace(@_); }
  
          # If the range list is empty, return a large value that isn't adjacent
          # to any that could be in the range list, for simpler tests
-        return $LAST_UNICODE_CODEPOINT + 2 unless scalar @{$ranges{$addr}};
+        return $MAX_UNICODE_CODEPOINT + 2 unless scalar @{$ranges{$addr}};
          return $ranges{$addr}->[0]->start;
      }
  
@@ -4019,8 +4063,8 @@ sub trace { return main::trace(@_); }
  
          # And finally, add the gap from the end of the table to the max
          # possible code point
-        if ($max < $LAST_UNICODE_CODEPOINT) {
-            $new->add_range($max + 1, $LAST_UNICODE_CODEPOINT);
+        if ($max < $MAX_UNICODE_CODEPOINT) {
+            $new->add_range($max + 1, $MAX_UNICODE_CODEPOINT);
          }
          return $new;
      }
@@ -4280,7 +4324,7 @@ sub trace { return main::trace(@_); }
          return $try_hard if $code >= 0xFDD0 && $code <= 0xFDEF;
          return $try_hard if ($code & 0xFFFE) == 0xFFFE; # includes FFFF
  
-        return $try_hard if $code > $LAST_UNICODE_CODEPOINT;   # keep in range
+        return $try_hard if $code > $MAX_UNICODE_CODEPOINT;   # keep in range
          return $try_hard if $code >= 0xD800 && $code <= 0xDFFF; # no surrogate
  
          return 1;
@@ -4431,7 +4475,7 @@ sub trace { return main::trace(@_); }
  
      my %internal_only;
      # Boolean; if set this table is for internal core Perl only use.
-    main::set_access('internal_only', \%internal_only);
+    main::set_access('internal_only', \%internal_only, 'r');
  
      my %find_table_from_alias;
      # The parent property passes this pointer to a hash which this class adds
@@ -4486,7 +4530,7 @@ sub trace { return main::trace(@_); }
  
      sub new {
          # All arguments are key => value pairs, which you can see below, most
-        # of which match fields documented above.  Otherwise: Pod_Entry,
+        # of which match fields documented above.  Otherwise: Re_Pod_Entry,
          # Externally_Ok, and Fuzzy apply to the names of the table, and are
          # documented in the Alias package
  
@@ -4505,7 +4549,7 @@ sub trace { return main::trace(@_); }
          my $complete_name = $complete_name{$addr}
                            = delete $args{'Complete_Name'};
          $format{$addr} = delete $args{'Format'};
-        $internal_only{$addr} = delete $args{'Internal_Only_Warning'} || 0;
+        $internal_only{$addr} = delete $args{'Internal_Only'} || 0;
          $output_range_counts{$addr} = delete $args{'Output_Range_Counts'};
          $property{$addr} = delete $args{'_Property'};
          $range_list{$addr} = delete $args{'_Range_List'};
@@ -4518,7 +4562,7 @@ sub trace { return main::trace(@_); }
          my $externally_ok = delete $args{'Externally_Ok'};
          my $loose_match = delete $args{'Fuzzy'};
          my $note = delete $args{'Note'};
-        my $make_pod_entry = delete $args{'Pod_Entry'};
+        my $make_re_pod_entry = delete $args{'Re_Pod_Entry'};
          my $perl_extension = delete $args{'Perl_Extension'};
  
          # Shouldn't have any left over
@@ -4544,7 +4588,7 @@ sub trace { return main::trace(@_); }
  
              # A placeholder table doesn't get documented, is a perl extension,
              # and quite likely will be empty
-            $make_pod_entry = 0 if ! defined $make_pod_entry;
+            $make_re_pod_entry = 0 if ! defined $make_re_pod_entry;
              $perl_extension = 1 if ! defined $perl_extension;
              push @tables_that_may_be_empty, $complete_name{$addr};
          }
@@ -4595,6 +4639,10 @@ sub trace { return main::trace(@_); }
  
          $perl_extension{$addr} = $perl_extension || 0;
  
+        # Don't list a property by default that is internal only
+        $make_re_pod_entry = 0 if ! defined $make_re_pod_entry
+                                  && $internal_only{$addr};
+
          # By convention what typically gets printed only or first is what's
          # first in the list, so put the full name there for good output
          # clarity.  Other routines rely on the full name being first on the
@@ -4602,7 +4650,7 @@ sub trace { return main::trace(@_); }
          $self->add_alias($full_name{$addr},
                              Externally_Ok => $externally_ok,
                              Fuzzy => $loose_match,
-                            Pod_Entry => $make_pod_entry,
+                            Re_Pod_Entry => $make_re_pod_entry,
                              Status => $status{$addr},
                              );
  
@@ -4611,7 +4659,7 @@ sub trace { return main::trace(@_); }
              $self->add_alias($name{$addr},
                              Externally_Ok => $externally_ok,
                              Fuzzy => $loose_match,
-                            Pod_Entry => $make_pod_entry,
+                            Re_Pod_Entry => $make_re_pod_entry,
                              Status => $status{$addr},
                              );
          }
@@ -4673,8 +4721,8 @@ sub trace { return main::trace(@_); }
          my %args = @_;
          my $loose_match = delete $args{'Fuzzy'};
  
-        my $make_pod_entry = delete $args{'Pod_Entry'};
-        $make_pod_entry = $YES unless defined $make_pod_entry;
+        my $make_re_pod_entry = delete $args{'Re_Pod_Entry'};
+        $make_re_pod_entry = $YES unless defined $make_re_pod_entry;
  
          my $externally_ok = delete $args{'Externally_Ok'};
          $externally_ok = 1 unless defined $externally_ok;
@@ -4748,7 +4796,7 @@ sub trace { return main::trace(@_); }
          splice @$list,
                  $insert_position,
                  0,
-                Alias->new($name, $loose_match, $make_pod_entry,
+                Alias->new($name, $loose_match, $make_re_pod_entry,
                                                      $externally_ok, $status);
  
          # This name may be shorter than any existing ones, so clear the cache
@@ -4965,8 +5013,6 @@ sub trace { return main::trace(@_); }
          my $return = "";
          $return .= $DEVELOPMENT_ONLY if $compare_versions;
          $return .= $HEADER;
-        no overloading;
-        $return .= $INTERNAL_ONLY if $internal_only{pack 'J', $self};
          return $return;
      }
  
@@ -5669,6 +5715,7 @@ sub trace { return main::trace(@_); }
                                  if defined $global_to_output_map{$full_name};
  
          # If table says to output, do so; if says to suppress it, do so.
+        return $INTERNAL_MAP if $self->internal_only;
          return $EXTERNAL_MAP if grep { $_ eq $full_name } @output_mapped_properties;
          return 0 if $self->status eq $SUPPRESSED;
  
@@ -5882,20 +5929,8 @@ END
  
      # The remaining variables are temporaries used while writing each table,
      # to output special ranges.
-    my $has_hangul_syllables;
      my @multi_code_point_maps;  # Map is to more than one code point.
  
-    # The key is the base name of the code point, and the value is an
-    # array giving all the ranges that use this base name.  Each range
-    # is actually a hash giving the 'low' and 'high' values of it.
-    my %names_ending_in_code_point;
-    my %loose_names_ending_in_code_point;
-
-    # Inverse mapping.  The list of ranges that have these kinds of
-    # names.  Each element contains the low, high, and base names in an
-    # anonymous hash.
-    my @code_points_ending_in_code_point;
-
      sub handle_special_range {
          # Called in the middle of write when it finds a range it doesn't know
          # how to handle.
@@ -5915,32 +5950,47 @@ END
          # No need to output the range if it maps to the default.
          return if $map eq $default_map{$addr};
  
+        my $property = $self->property;
+
          # Switch based on the map type...
          if ($type == $HANGUL_SYLLABLE) {
  
              # These are entirely algorithmically determinable based on
              # some constants furnished by Unicode; for now, just set a
              # flag to indicate that have them.  After everything is figured
-            # out, we will output the code that does the algorithm.
-            $has_hangul_syllables = 1;
+            # out, we will output the code that does the algorithm.  (Don't
+            # output them if not needed because we are suppressing this
+            # property.)
+            $has_hangul_syllables = 1 if $property->to_output_map;
          }
          elsif ($type == $CP_IN_NAME) {
  
-            # Code points whose the name ends in their code point are also
+            # Code points whose name ends in their code point are also
              # algorithmically determinable, but need information about the map
              # to do so.  Both the map and its inverse are stored in data
-            # structures output in the file.
-            push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
-            push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
-
-            my $squeezed = $map =~ s/[-\s]+//gr;
-            push @{$loose_names_ending_in_code_point{$squeezed}->{'low'}}, $low;
-            push @{$loose_names_ending_in_code_point{$squeezed}->{'high'}}, $high;
-
-            push @code_points_ending_in_code_point, { low => $low,
-                                                      high => $high,
-                                                      name => $map
-                                                    };
+            # structures output in the file.  They are stored in the mean time
+            # in global lists The lists will be written out later into Name.pm,
+            # which is created only if needed.  In order to prevent duplicates
+            # in the list, only add to them for one property, should multiple
+            # ones need them.
+            if ($needing_code_points_ending_in_code_point == 0) {
+                $needing_code_points_ending_in_code_point = $property;
+            }
+            if ($property == $needing_code_points_ending_in_code_point) {
+                push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
+                push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
+
+                my $squeezed = $map =~ s/[-\s]+//gr;
+                push @{$loose_names_ending_in_code_point{$squeezed}->{'low'}},
+                                                                          $low;
+                push @{$loose_names_ending_in_code_point{$squeezed}->{'high'}},
+                                                                         $high;
+
+                push @code_points_ending_in_code_point, { low => $low,
+                                                        high => $high,
+                                                        name => $map
+                                                        };
+            }
          }
          elsif ($range->type == $MULTI_CP || $range->type == $NULL) {
  
@@ -6053,239 +6103,6 @@ END
              $pre_body .= join("\n", @multi_code_point_maps) . "\n);\n";
          }
  
-        if ($has_hangul_syllables || @code_points_ending_in_code_point) {
-
-            # Convert these structures to output format.
-            my $code_points_ending_in_code_point =
-                main::simple_dumper(\@code_points_ending_in_code_point,
-                                    ' ' x 8);
-            my $names = main::simple_dumper(\%names_ending_in_code_point,
-                                            ' ' x 8);
-            my $loose_names = main::simple_dumper(\%loose_names_ending_in_code_point,
-                                            ' ' x 8);
-
-            # Do the same with the Hangul names,
-            my $jamo;
-            my $jamo_l;
-            my $jamo_v;
-            my $jamo_t;
-            my $jamo_re;
-            if ($has_hangul_syllables) {
-
-                # Construct a regular expression of all the possible
-                # combinations of the Hangul syllables.
-                my @L_re;   # Leading consonants
-                for my $i ($LBase .. $LBase + $LCount - 1) {
-                    push @L_re, $Jamo{$i}
-                }
-                my @V_re;   # Middle vowels
-                for my $i ($VBase .. $VBase + $VCount - 1) {
-                    push @V_re, $Jamo{$i}
-                }
-                my @T_re;   # Trailing consonants
-                for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
-                    push @T_re, $Jamo{$i}
-                }
-
-                # The whole re is made up of the L V T combination.
-                $jamo_re = '('
-                            . join ('|', sort @L_re)
-                            . ')('
-                            . join ('|', sort @V_re)
-                            . ')('
-                            . join ('|', sort @T_re)
-                            . ')?';
-
-                # These hashes needed by the algorithm were generated
-                # during reading of the Jamo.txt file
-                $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
-                $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
-                $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
-                $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
-            }
-
-            $pre_body .= <<END;
-
-# To achieve significant memory savings when this file is read in,
-# algorithmically derivable code points are omitted from the main body below.
-# Instead, the following routines can be used to translate between name and
-# code point and vice versa
-
-{ # Closure
-
-    # Matches legal code point.  4-6 hex numbers, If there are 6, the
-    # first two must be '10'; if there are 5, the first must not be a '0'.
-    # First can match at the end of a word provided that the end of the
-    # word doesn't look like a hex number.
-    my \$run_on_code_point_re = qr/$run_on_code_point_re/;
-    my \$code_point_re = qr/$code_point_re/;
-
-    # In the following hash, the keys are the bases of names which includes
-    # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01.  The values
-    # of each key is another hash which is used to get the low and high ends
-    # for each range of code points that apply to the name.
-    my %names_ending_in_code_point = (
-$names
-    );
-
-    # The following hash is a copy of the previous one, except is for loose
-    # matching, so each name has blanks and dashes squeezed out
-    my %loose_names_ending_in_code_point = (
-$loose_names
-    );
-
-    # And the following array gives the inverse mapping from code points to
-    # names.  Lowest code points are first
-    my \@code_points_ending_in_code_point = (
-$code_points_ending_in_code_point
-    );
-END
-            # Earlier releases didn't have Jamos.  No sense outputting
-            # them unless will be used.
-            if ($has_hangul_syllables) {
-                $pre_body .= <<END;
-
-    # Convert from code point to Jamo short name for use in composing Hangul
-    # syllable names
-    my %Jamo = (
-$jamo
-    );
-
-    # Leading consonant (can be null)
-    my %Jamo_L = (
-$jamo_l
-    );
-
-    # Vowel
-    my %Jamo_V = (
-$jamo_v
-    );
-
-    # Optional trailing consonant
-    my %Jamo_T = (
-$jamo_t
-    );
-
-    # Computed re that splits up a Hangul name into LVT or LV syllables
-    my \$syllable_re = qr/$jamo_re/;
-
-    my \$HANGUL_SYLLABLE = "HANGUL SYLLABLE ";
-    my \$loose_HANGUL_SYLLABLE = "HANGULSYLLABLE";
-
-    # These constants names and values were taken from the Unicode standard,
-    # version 5.1, section 3.12.  They are used in conjunction with Hangul
-    # syllables
-    my \$SBase = $SBase_string;
-    my \$LBase = $LBase_string;
-    my \$VBase = $VBase_string;
-    my \$TBase = $TBase_string;
-    my \$SCount = $SCount;
-    my \$LCount = $LCount;
-    my \$VCount = $VCount;
-    my \$TCount = $TCount;
-    my \$NCount = \$VCount * \$TCount;
-END
-            } # End of has Jamos
-
-            $pre_body .= << 'END';
-
-    sub name_to_code_point_special {
-        my ($name, $loose) = @_;
-
-        # Returns undef if not one of the specially handled names; otherwise
-        # returns the code point equivalent to the input name
-        # $loose is non-zero if to use loose matching, 'name' in that case
-        # must be input as upper case with all blanks and dashes squeezed out.
-END
-            if ($has_hangul_syllables) {
-                $pre_body .= << 'END';
-
-        if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//)
-            || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//))
-        {
-            return if $name !~ qr/^$syllable_re$/;
-            my $L = $Jamo_L{$1};
-            my $V = $Jamo_V{$2};
-            my $T = (defined $3) ? $Jamo_T{$3} : 0;
-            return ($L * $VCount + $V) * $TCount + $T + $SBase;
-        }
-END
-            }
-            $pre_body .= << 'END';
-
-        # Name must end in 'code_point' for this to handle.
-        return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x)
-                   || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x));
-
-        my $base = $1;
-        my $code_point = CORE::hex $2;
-        my $names_ref;
-
-        if ($loose) {
-            $names_ref = \%loose_names_ending_in_code_point;
-        }
-        else {
-            return if $base !~ s/-$//;
-            $names_ref = \%names_ending_in_code_point;
-        }
-
-        # Name must be one of the ones which has the code point in it.
-        return if ! $names_ref->{$base};
-
-        # Look through the list of ranges that apply to this name to see if
-        # the code point is in one of them.
-        for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) {
-            return if $names_ref->{$base}{'low'}->[$i] > $code_point;
-            next if $names_ref->{$base}{'high'}->[$i] < $code_point;
-
-            # Here, the code point is in the range.
-            return $code_point;
-        }
-
-        # Here, looked like the name had a code point number in it, but
-        # did not match one of the valid ones.
-        return;
-    }
-
-    sub code_point_to_name_special {
-        my $code_point = shift;
-
-        # Returns the name of a code point if algorithmically determinable;
-        # undef if not
-END
-            if ($has_hangul_syllables) {
-                $pre_body .= << 'END';
-
-        # If in the Hangul range, calculate the name based on Unicode's
-        # algorithm
-        if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) {
-            use integer;
-            my $SIndex = $code_point - $SBase;
-            my $L = $LBase + $SIndex / $NCount;
-            my $V = $VBase + ($SIndex % $NCount) / $TCount;
-            my $T = $TBase + $SIndex % $TCount;
-            $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}";
-            $name .= $Jamo{$T} if $T != $TBase;
-            return $name;
-        }
-END
-            }
-            $pre_body .= << 'END';
-
-        # Look through list of these code points for one in range.
-        foreach my $hash (@code_points_ending_in_code_point) {
-            return if $code_point < $hash->{'low'};
-            if ($code_point <= $hash->{'high'}) {
-                return sprintf("%s-%04X", $hash->{'name'}, $code_point);
-            }
-        }
-        return;            # None found
-    }
-} # End closure
-
-END
-        } # End of has hangul or code point in name maps.
-
          my $format = $self->format;
  
          my $return = <<END;
@@ -6324,11 +6141,7 @@ END
          my $addr = do { no overloading; pack 'J', $self; };
  
          # Clear the temporaries
-        $has_hangul_syllables = 0;
          undef @multi_code_point_maps;
-        undef %names_ending_in_code_point;
-        undef %loose_names_ending_in_code_point;
-        undef @code_points_ending_in_code_point;
  
          # Calculate the format of the table if not already done.
          my $format = $self->format;
@@ -6872,6 +6685,14 @@ sub trace { return main::trace(@_); }
          return $self->_range_list->add_range(@_);
      }
  
+    sub header {
+        my $self = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        # All match tables are to be used only by the Perl core.
+        return $self->SUPER::header() . $INTERNAL_ONLY;
+    }
+
      sub pre_body {  # Does nothing for match tables.
          return
      }
@@ -7522,9 +7343,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
                                  # gets property's status by default
                                  Status => $self->status,
                                  _Status_Info => $self->status_info,
-                                %args,
-                                Internal_Only_Warning => 1); # Override any
-                                                             # input param
+                                %args);
              return unless defined $table;
          }
  
@@ -8326,9 +8145,8 @@ sub utf8_heavy_name ($$) {
              my $copy = $item;
              $copy = $UNDEF unless defined $copy;
  
-            # Quote non-numbers (numbers also have optional leading '-' and
-            # fractions)
-            if ($copy eq "" || $copy !~ /^ -? \d+ ( \. \d+ )? $/x) {
+            # Quote non-integers (integers also have optional leading '-')
+            if ($copy eq "" || $copy !~ /^ -? \d+ $/x) {
  
                  # Escape apostrophe and backslash
                  $copy =~ s/ ( ['\\] ) /\\$1/xg;
@@ -9916,7 +9734,7 @@ END
                         Default_Map => "",
                         Directory => File::Spec->curdir(),
                         File => 'Name',
-                       Internal_Only_Warning => 1,
+                       Internal_Only => 1,
                         Perl_Extension => 1,
                         Range_Size_1 => \&output_perl_charnames_line,
                         Type => $STRING,
@@ -9926,7 +9744,7 @@ END
                                          Directory => File::Spec->curdir(),
                                          File => 'Decomposition',
                                          Format => $DECOMP_STRING_FORMAT,
-                                        Internal_Only_Warning => 1,
+                                        Internal_Only => 1,
                                          Perl_Extension => 1,
                                          Default_Map => $CODE_POINT,
  
@@ -10517,7 +10335,7 @@ END
  
          my ($code_point, @fields) = split /\s*;\s*/, $_, -1;
          if ($code_point eq '0007') {
-            $fields[$CHARNAME] = "ALERT";
+            $fields[$CHARNAME] = "";
          }
          elsif ($code_point eq '070F') { # Unicode Corrigendum #8; see
                              # http://www.unicode.org/versions/corrigendum8.html
@@ -10697,6 +10515,7 @@ sub filter_arabic_shaping_line {
                      Type => $STRING,
                      Default_Map => $CODE_POINT,
                      Perl_Extension => 1,
+                    Internal_Only => 1,
                      Description => "The simple mappings for $case for code points that have full mappings as well");
              $simple_only->set_to_output_map($INTERNAL_MAP);
              $simple_only->add_comment(join_lines( <<END
@@ -11360,6 +11179,10 @@ sub  filter_script_extensions_line {
      return;
  }
  
+sub setup_v6_name_alias {
+        property_ref('Name_Alias')->add_map(7, 7, "ALERT");
+}
+
  sub finish_Unicode() {
      # This routine should be called after all the Unicode files have been read
      # in.  It:
@@ -11472,7 +11295,7 @@ sub finish_Unicode() {
                  # This fills in any missing values with the default.  It's not
                  # necessary to do this with binary properties, as the default
                  # is defined completely in terms of the Y table.
-                $property->add_map(0, $LAST_UNICODE_CODEPOINT,
+                $property->add_map(0, $MAX_UNICODE_CODEPOINT,
                                     $default_map, Replace => $NO);
              }
          }
@@ -11667,7 +11490,7 @@ sub compile_perl() {
      # 'Any' is all code points.  As an error check, instead of just setting it
      # to be that, construct it to be the union of all the major categories
      $Any = $perl->add_match_table('Any',
-            Description  => "[\\x{0000}-\\x{$LAST_UNICODE_CODEPOINT_STRING}]",
+            Description  => "[\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]",
              Matches_All => 1);
  
      foreach my $major_table ($gc->tables) {
@@ -11678,10 +11501,10 @@ sub compile_perl() {
          $Any += $major_table;
      }
  
-    if ($Any->max != $LAST_UNICODE_CODEPOINT) {
+    if ($Any->max != $MAX_UNICODE_CODEPOINT) {
          Carp::my_carp_bug("Generated highest code point ("
             . sprintf("%X", $Any->max)
-           . ") doesn't match expected value $LAST_UNICODE_CODEPOINT_STRING.")
+           . ") doesn't match expected value $MAX_UNICODE_CODEPOINT_STRING.")
      }
      if ($Any->range_count != 1 || $Any->min != 0) {
       Carp::my_carp_bug("Generated table 'Any' doesn't match all code points.")
@@ -11696,8 +11519,8 @@ sub compile_perl() {
                                  );
  
      # Our internal-only property should be treated as more than just a
-    # synonym.
-    $perl->add_match_table('_CombAbove')
+    # synonym; grandfather it in to the pod.
+    $perl->add_match_table('_CombAbove', Re_Pod_Entry => 1)
              ->set_equivalent_to(property_ref('ccc')->table('Above'),
                                                                  Related => 1);
  
@@ -11788,8 +11611,10 @@ sub compile_perl() {
      # one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), Format (Cf),
      # Modifier_Letter (Lm), or Modifier_Symbol (Sk).
  
-    # Perl has long had an internal-only alias for this property.
-    my $perl_case_ignorable = $perl->add_match_table('_Case_Ignorable');
+    # Perl has long had an internal-only alias for this property; grandfather
+    # it in to the pod, but discourage its use.
+    my $perl_case_ignorable = $perl->add_match_table('_Case_Ignorable',
+                                                    Re_Pod_Entry => 1);
      my $case_ignorable = property_ref('Case_Ignorable');
      if (defined $case_ignorable && ! $case_ignorable->is_empty) {
          $perl_case_ignorable->set_equivalent_to($case_ignorable->table('Y'),
@@ -11982,8 +11807,9 @@ sub compile_perl() {
                      Description => '\p{Punct} + ASCII-range \p{Symbol}',
                      Initialize => $gc->table('Punctuation')
                                  + ($ASCII & $gc->table('Symbol')),
+                                Perl_Extension => 1
          );
-    $perl->add_match_table('PosixPunct',
+    $perl->add_match_table('PosixPunct', Perl_Extension => 1,
          Description => '[-!"#$%&\'()*+,./:;<>?@[\\\]^_`{|}~]',
          Initialize => $ASCII & $XPosixPunct,
          );
@@ -12032,8 +11858,9 @@ sub compile_perl() {
  
      # _CanonDCIJ is equivalent to Soft_Dotted, but if on a release earlier
      # than SD appeared, construct it ourselves, based on the first release SD
-    # was in.
-    my $CanonDCIJ = $perl->add_match_table('_CanonDCIJ');
+    # was in.  A pod entry is grandfathered in for it
+    my $CanonDCIJ = $perl->add_match_table('_CanonDCIJ', Re_Pod_Entry => 1,
+                                      Perl_Extension => 1, Internal_Only => 1);
      my $soft_dotted = property_ref('Soft_Dotted');
      if (defined $soft_dotted && ! $soft_dotted->is_empty) {
          $CanonDCIJ->set_equivalent_to($soft_dotted->table('Y'), Related => 1);
@@ -12054,8 +11881,10 @@ sub compile_perl() {
      }
  
      # These are used in Unicode's definition of \X
-    my $begin = $perl->add_match_table('_X_Begin', Perl_Extension => 1);
-    my $extend = $perl->add_match_table('_X_Extend', Perl_Extension => 1);
+    my $begin = $perl->add_match_table('_X_Begin', Perl_Extension => 1,
+                                       Internal_Only => 1);
+    my $extend = $perl->add_match_table('_X_Extend', Perl_Extension => 1,
+                                        Internal_Only => 1);
  
      # For backward compatibility, Perl has its own definition for IDStart
      # First, we include the underscore, and then the regular XID_Start also
@@ -12130,7 +11959,7 @@ sub compile_perl() {
  
      # More GCB.  If we found some hangul syllables, populate a combined
      # table.
-    my $lv_lvt_v = $perl->add_match_table('_X_LV_LVT_V');
+    my $lv_lvt_v = $perl->add_match_table('_X_LV_LVT_V', Perl_Extension => 1, Internal_Only => 1);
      my $LV = $gcb->table('LV');
      if ($LV->is_empty) {
          push @tables_that_may_be_empty, $lv_lvt_v->complete_name;
@@ -12181,6 +12010,13 @@ This file is for charnames.pm.  It is the union of the $comment properties.
  Unicode_1_Name entries are used only for otherwise nameless code
  points.
  $alias_sentence
+This file doesn't include the algorithmically determinable names.  For those,
+use 'unicore/Name.pm'
+END
+    ));
+    property_ref('Name')->add_comment(join_lines( <<END
+This file doesn't include the algorithmically determinable names.  For those,
+use 'unicore/Name.pm'
  END
      ));
  
@@ -12190,7 +12026,6 @@ END
          my $in = Property->new('In',
                                  Default_Map => $default_map,
                                  Full_Name => "Present_In",
-                                Internal_Only_Warning => 1,
                                  Perl_Extension => 1,
                                  Type => $ENUM,
                                  Initialize => $age,
@@ -12270,7 +12105,7 @@ END
          foreach my $alias ($table->aliases) {
              next if $alias->name =~ /^_/;
              $table->add_alias('Is_' . $alias->name,
-                               Pod_Entry => 0,
+                               Re_Pod_Entry => 0,
                                 Status => $alias->status,
                                 Externally_Ok => 0);
          }
@@ -12289,7 +12124,7 @@ END
           Initialize => $gc->table('Unassigned')
                         & property_ref('Noncharacter_Code_Point')->table('N'));
  
-        for (my $i = 0; $i <= $LAST_UNICODE_CODEPOINT; $i++ ) {
+        for (my $i = 0; $i <= $MAX_UNICODE_CODEPOINT; $i++ ) {
              $i = populate_char_info($i);    # Note sets $i so may cause skips
          }
      }
@@ -12382,7 +12217,7 @@ sub add_perl_synonyms() {
  
                      # No name collision, so ok to add the perl synonym.
  
-                    my $make_pod_entry;
+                    my $make_re_pod_entry;
                      my $externally_ok;
                      my $status = $alias->status;
                      if ($nominal_property == $block) {
@@ -12393,17 +12228,17 @@ sub add_perl_synonyms() {
                          # we don't want people using the name without the
                          # 'In', so discourage that.
                          if ($prefix eq "") {
-                            $make_pod_entry = 1;
+                            $make_re_pod_entry = 1;
                              $status = $status || $DISCOURAGED;
                              $externally_ok = 0;
                          }
                          elsif ($prefix eq 'In_') {
-                            $make_pod_entry = 0;
+                            $make_re_pod_entry = 0;
                              $status = $status || $NORMAL;
                              $externally_ok = 1;
                          }
                          else {
-                            $make_pod_entry = 0;
+                            $make_re_pod_entry = 0;
                              $status = $status || $DISCOURAGED;
                              $externally_ok = 0;
                          }
@@ -12412,7 +12247,7 @@ sub add_perl_synonyms() {
  
                          # The 'Is' prefix is handled in the pod by a wild
                          # card, and we won't use it for an external name
-                        $make_pod_entry = 0;
+                        $make_re_pod_entry = 0;
                          $status = $status || $NORMAL;
                          $externally_ok = 0;
                      }
@@ -12420,7 +12255,7 @@ sub add_perl_synonyms() {
  
                          # Here, is an empty prefix, non block.  This gets its
                          # own pod entry and can be used for an external name.
-                        $make_pod_entry = 1;
+                        $make_re_pod_entry = 1;
                          $status = $status || $NORMAL;
                          $externally_ok = 1;
                      }
@@ -12434,7 +12269,7 @@ sub add_perl_synonyms() {
                          # Here, have found a table for $perl.  Add this alias
                          # to it, and are done with this prefix.
                          $equivalent->add_alias($proposed_name,
-                                        Pod_Entry => $make_pod_entry,
+                                        Re_Pod_Entry => $make_re_pod_entry,
                                          Status => $status,
                                          Externally_Ok => $externally_ok);
                          trace "adding alias perl=$proposed_name to $equivalent" if main::DEBUG && $to_trace;
@@ -12444,7 +12279,7 @@ sub add_perl_synonyms() {
                      # Here, $perl doesn't already have a table that is a
                      # synonym for this property, add one.
                      my $added_table = $perl->add_match_table($proposed_name,
-                                            Pod_Entry => $make_pod_entry,
+                                            Re_Pod_Entry => $make_re_pod_entry,
                                              Status => $status,
                                              Externally_Ok => $externally_ok);
                      # And it will be related to the actual table, since it is
@@ -12911,7 +12746,7 @@ sub make_table_pod_entries($) {
          foreach my $alias ($table->aliases) {
  
              # Skip if not to go in pod.
-            next unless $alias->make_pod_entry;
+            next unless $alias->make_re_pod_entry;
  
              # Start gathering all the components for the entry
              my $name = $alias->name;
@@ -13375,6 +13210,37 @@ END
  
      } # End of looping through each reason.
  
+    # Similiarly, generate a list of files that we don't use, grouped by the
+    # reasons why.  First, create a hash whose keys are the reasons, and whose
+    # values are anonymous arrays of all the files that share that reason.
+    my %grouped_by_reason;
+    foreach my $file (keys %ignored_files) {
+        push @{$grouped_by_reason{$ignored_files{$file}}}, $file;
+    }
+
+    # Then, sort each group.
+    foreach my $group (keys %grouped_by_reason) {
+        @{$grouped_by_reason{$group}} = sort { lc $a cmp lc $b }
+                                        @{$grouped_by_reason{$group}} ;
+    }
+
+    # Finally, create the output text.  For each reason (sorted by the
+    # alphabetically first file that has that reason)...
+    my @unused_files;
+    foreach my $reason (sort { lc $grouped_by_reason{$a}->[0]
+                               cmp lc $grouped_by_reason{$b}->[0]
+                              }
+                         keys %grouped_by_reason)
+    {
+        # Add all the files that have that reason to the output.  Start
+        # with an empty line.
+        push @unused_files, "\n\n";
+        push @unused_files, map { "\n=item F<$_> \n" }
+                            @{$grouped_by_reason{$reason}};
+        # And add the reason under the list of files
+        push @unused_files, "\n$reason\n";
+    }
+
      # Generate a list of the properties whose map table we output, from the
      # global @map_properties.
      my @map_tables_actually_output;
@@ -13444,17 +13310,24 @@ To change this file, edit $0 instead.
  
  =head1 NAME
  
-$pod_file - Index of Unicode Version $string_version properties in Perl
+$pod_file - Index of Unicode Version $string_version character properties in Perl
  
  =head1 DESCRIPTION
  
-There are many properties in Unicode, and Perl provides access to almost all of
-them, as well as some additional extensions and short-cut synonyms.
+This document provides information about the portion of the Unicode database
+that deals with character properties, that is the portion that is defined on
+single code points.  (L</Other information in the Unicode data base>
+below briefly mentions other data that Unicode provides.)
  
-And just about all of the few that aren't accessible through the Perl
-core are accessible through the modules: L<Unicode::Normalize> and
-L<Unicode::UCD>, and for Unihan properties, via the CPAN module
-L<Unicode::Unihan>.
+Perl can provide access to all non-provisional Unicode character properties,
+though not all are enabled by default.  The omitted ones are the Unihan
+properties (accessible via the CPAN module L<Unicode::Unihan>) and certain
+deprecated or Unicode-internal properties.  (An installation may choose to
+recompile Perl's tables to change this.  See L<Unicode regular expression
+properties that are NOT accepted by Perl>.)
+
+Perl also provides some additional extensions and short-cut synonyms
+for Unicode properties.
  
  This document merely lists all available properties and does not attempt to
  explain what each property really means.  There is a brief description of each
@@ -13534,7 +13407,7 @@ There are several varieties of obsolescence:
  
  =item Stabilized
  
-Obsolete properties may be stabilized.  Such a determination does not indicate
+A property may be stabilized.  Such a determination does not indicate
  that the property should or should not be used; instead it is a declaration
  that the property will not be maintained nor extended for newly encoded
  characters.  Such properties are marked with $a_bold_stabilized in the
@@ -13542,7 +13415,7 @@ table.
  
  =item Deprecated
  
-An obsolete property may be deprecated, perhaps because its original intent
+A property may be deprecated, perhaps because its original intent
  has been replaced by another property, or because its specification was
  somehow defective.  This means that its use is strongly
  discouraged, so much so that a warning will be issued if used, unless the
@@ -13750,6 +13623,33 @@ specified by an entry that looks like this:
  
      \$utf8::SwashInfo{'ToNAME'}{'specials_name'} = 'utf8::ToSpecNAME';
  
+
+=head1 Other information in the Unicode data base
+
+The Unicode data base is delivered in two different formats.  The XML version
+is valid for more modern Unicode releases.  The other version is a collection
+of files.  The two are intended to give equivalent information.  Perl uses the
+older form; this allows you to recompile Perl to use early Unicode releases.
+
+The only non-character property that Perl currently supports is Named
+Sequences, in which a sequence of code points
+is given a name and generally treated as a single entity.  (Perl supports
+these via the C<\\N{...}> double-quotish construct,
+L<charnames/charnames::string_vianame(name)>, and L<Unicode::UCD/namedseq()>.
+
+Below is a list of the files in the Unicode data base that Perl doesn't
+currently use, along with very brief descriptions of their purposes.
+Some of the names of the files have been shortened from those that Unicode
+uses, in order to allow them to be distinguishable from similarly named files
+on file systems for which only the first 8 characters of a name are
+significant.
+
+=over 4
+
+@unused_files
+
+=back
+
  =head1 SEE ALSO
  
  L<$unicode_reference_url>
@@ -13844,6 +13744,255 @@ END
      return;
  }
  
+sub make_Name_pm () {
+    # Create and write Name.pm, which contains subroutines and data to use in
+    # conjunction with Name.pl
+
+    # Maybe there's nothing to do.
+    return unless $has_hangul_syllables || @code_points_ending_in_code_point;
+
+    my @name = <<END;
+$HEADER
+$INTERNAL_ONLY
+END
+
+    # Convert these structures to output format.
+    my $code_points_ending_in_code_point =
+        main::simple_dumper(\@code_points_ending_in_code_point,
+                            ' ' x 8);
+    my $names = main::simple_dumper(\%names_ending_in_code_point,
+                                    ' ' x 8);
+    my $loose_names = main::simple_dumper(\%loose_names_ending_in_code_point,
+                                    ' ' x 8);
+
+    # Do the same with the Hangul names,
+    my $jamo;
+    my $jamo_l;
+    my $jamo_v;
+    my $jamo_t;
+    my $jamo_re;
+    if ($has_hangul_syllables) {
+
+        # Construct a regular expression of all the possible
+        # combinations of the Hangul syllables.
+        my @L_re;   # Leading consonants
+        for my $i ($LBase .. $LBase + $LCount - 1) {
+            push @L_re, $Jamo{$i}
+        }
+        my @V_re;   # Middle vowels
+        for my $i ($VBase .. $VBase + $VCount - 1) {
+            push @V_re, $Jamo{$i}
+        }
+        my @T_re;   # Trailing consonants
+        for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
+            push @T_re, $Jamo{$i}
+        }
+
+        # The whole re is made up of the L V T combination.
+        $jamo_re = '('
+                    . join ('|', sort @L_re)
+                    . ')('
+                    . join ('|', sort @V_re)
+                    . ')('
+                    . join ('|', sort @T_re)
+                    . ')?';
+
+        # These hashes needed by the algorithm were generated
+        # during reading of the Jamo.txt file
+        $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
+        $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
+        $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
+        $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
+    }
+
+    push @name, <<END;
+
+# This module contains machine-generated tables and code for the
+# algorithmically-determinable Unicode character names.  The following
+# routines can be used to translate between name and code point and vice versa
+
+{ # Closure
+
+    # Matches legal code point.  4-6 hex numbers, If there are 6, the first
+    # two must be 10; if there are 5, the first must not be a 0.  Written this
+    # way to decrease backtracking.  The first regex allows the code point to
+    # be at the end of a word, but to work properly, the word shouldn't end
+    # with a valid hex character.  The second one won't match a code point at
+    # the end of a word, and doesn't have the run-on issue
+    my \$run_on_code_point_re = qr/$run_on_code_point_re/;
+    my \$code_point_re = qr/$code_point_re/;
+
+    # In the following hash, the keys are the bases of names which includes
+    # the code point in the name, like CJK UNIFIED IDEOGRAPH-4E01.  The values
+    # of each key is another hash which is used to get the low and high ends
+    # for each range of code points that apply to the name.
+    my %names_ending_in_code_point = (
+$names
+    );
+
+    # The following hash is a copy of the previous one, except is for loose
+    # matching, so each name has blanks and dashes squeezed out
+    my %loose_names_ending_in_code_point = (
+$loose_names
+    );
+
+    # And the following array gives the inverse mapping from code points to
+    # names.  Lowest code points are first
+    my \@code_points_ending_in_code_point = (
+$code_points_ending_in_code_point
+    );
+END
+    # Earlier releases didn't have Jamos.  No sense outputting
+    # them unless will be used.
+    if ($has_hangul_syllables) {
+        push @name, <<END;
+
+    # Convert from code point to Jamo short name for use in composing Hangul
+    # syllable names
+    my %Jamo = (
+$jamo
+    );
+
+    # Leading consonant (can be null)
+    my %Jamo_L = (
+$jamo_l
+    );
+
+    # Vowel
+    my %Jamo_V = (
+$jamo_v
+    );
+
+    # Optional trailing consonant
+    my %Jamo_T = (
+$jamo_t
+    );
+
+    # Computed re that splits up a Hangul name into LVT or LV syllables
+    my \$syllable_re = qr/$jamo_re/;
+
+    my \$HANGUL_SYLLABLE = "HANGUL SYLLABLE ";
+    my \$loose_HANGUL_SYLLABLE = "HANGULSYLLABLE";
+
+    # These constants names and values were taken from the Unicode standard,
+    # version 5.1, section 3.12.  They are used in conjunction with Hangul
+    # syllables
+    my \$SBase = $SBase_string;
+    my \$LBase = $LBase_string;
+    my \$VBase = $VBase_string;
+    my \$TBase = $TBase_string;
+    my \$SCount = $SCount;
+    my \$LCount = $LCount;
+    my \$VCount = $VCount;
+    my \$TCount = $TCount;
+    my \$NCount = \$VCount * \$TCount;
+END
+    } # End of has Jamos
+
+    push @name, << 'END';
+
+    sub name_to_code_point_special {
+        my ($name, $loose) = @_;
+
+        # Returns undef if not one of the specially handled names; otherwise
+        # returns the code point equivalent to the input name
+        # $loose is non-zero if to use loose matching, 'name' in that case
+        # must be input as upper case with all blanks and dashes squeezed out.
+END
+    if ($has_hangul_syllables) {
+        push @name, << 'END';
+
+        if ((! $loose && $name =~ s/$HANGUL_SYLLABLE//)
+            || ($loose && $name =~ s/$loose_HANGUL_SYLLABLE//))
+        {
+            return if $name !~ qr/^$syllable_re$/;
+            my $L = $Jamo_L{$1};
+            my $V = $Jamo_V{$2};
+            my $T = (defined $3) ? $Jamo_T{$3} : 0;
+            return ($L * $VCount + $V) * $TCount + $T + $SBase;
+        }
+END
+    }
+    push @name, << 'END';
+
+        # Name must end in 'code_point' for this to handle.
+        return if (($loose && $name !~ /^ (.*?) ($run_on_code_point_re) $/x)
+                   || (! $loose && $name !~ /^ (.*) ($code_point_re) $/x));
+
+        my $base = $1;
+        my $code_point = CORE::hex $2;
+        my $names_ref;
+
+        if ($loose) {
+            $names_ref = \%loose_names_ending_in_code_point;
+        }
+        else {
+            return if $base !~ s/-$//;
+            $names_ref = \%names_ending_in_code_point;
+        }
+
+        # Name must be one of the ones which has the code point in it.
+        return if ! $names_ref->{$base};
+
+        # Look through the list of ranges that apply to this name to see if
+        # the code point is in one of them.
+        for (my $i = 0; $i < scalar @{$names_ref->{$base}{'low'}}; $i++) {
+            return if $names_ref->{$base}{'low'}->[$i] > $code_point;
+            next if $names_ref->{$base}{'high'}->[$i] < $code_point;
+
+            # Here, the code point is in the range.
+            return $code_point;
+        }
+
+        # Here, looked like the name had a code point number in it, but
+        # did not match one of the valid ones.
+        return;
+    }
+
+    sub code_point_to_name_special {
+        my $code_point = shift;
+
+        # Returns the name of a code point if algorithmically determinable;
+        # undef if not
+END
+    if ($has_hangul_syllables) {
+        push @name, << 'END';
+
+        # If in the Hangul range, calculate the name based on Unicode's
+        # algorithm
+        if ($code_point >= $SBase && $code_point <= $SBase + $SCount -1) {
+            use integer;
+            my $SIndex = $code_point - $SBase;
+            my $L = $LBase + $SIndex / $NCount;
+            my $V = $VBase + ($SIndex % $NCount) / $TCount;
+            my $T = $TBase + $SIndex % $TCount;
+            $name = "$HANGUL_SYLLABLE$Jamo{$L}$Jamo{$V}";
+            $name .= $Jamo{$T} if $T != $TBase;
+            return $name;
+        }
+END
+    }
+    push @name, << 'END';
+
+        # Look through list of these code points for one in range.
+        foreach my $hash (@code_points_ending_in_code_point) {
+            return if $code_point < $hash->{'low'};
+            if ($code_point <= $hash->{'high'}) {
+                return sprintf("%s-%04X", $hash->{'name'}, $code_point);
+            }
+        }
+        return;            # None found
+    }
+} # End closure
+
+1;
+END
+
+    main::write("Name.pm", 0, \@name);  # The 0 means no utf8.
+    return;
+}
+
+
  sub write_all_tables() {
      # Write out all the tables generated by this program to files, as well as
      # the supporting data structures, pod file, and .t file.
@@ -14138,7 +14287,7 @@ sub write_all_tables() {
                          # this one.
                          next if $i == 0
                                  || ! defined $pod_directory
-                                || ! $alias->make_pod_entry;
+                                || ! $alias->make_re_pod_entry;
  
                          my $rhs = $full_property_name;
                          if ($property != $perl && $table->perl_extension) {
@@ -14212,8 +14361,9 @@ sub write_all_tables() {
      # Write out the pod file
      make_pod;
  
-    # And Heavy.pl
+    # And Heavy.pl, Name.pm
      make_Heavy;
+    make_Name_pm;
  
      make_property_test_script() if $make_test_script;
      return;
@@ -14522,7 +14672,7 @@ sub make_property_test_script() {
              # pre-existing one.
              push @property_aliases, map { Alias->new("Is_" . $_->name,
                                                      $_->loose_match,
-                                                    $_->make_pod_entry,
+                                                    $_->make_re_pod_entry,
                                                      $_->externally_ok,
                                                      $_->status)
                                           } @property_aliases;
@@ -14814,7 +14964,7 @@ my @input_file_objects = (
                      Property => 'Bidi_Mirroring_Glyph',
                      ),
      Input_file->new("NormalizationTest.txt", v3.0.1,
-                    Skip => 1,
+                    Skip => 'Validation Tests',
                      ),
      Input_file->new('CaseFolding.txt', v3.0.1,
                      Pre_Handler => \&setup_case_folding,
@@ -14856,13 +15006,13 @@ my @input_file_objects = (
                      Handler => \&process_GCB_test,
                      ),
      Input_file->new("$AUXILIARY/LBTest.txt", v4.1.0,
-                    Skip => 1,
+                    Skip => 'Validation Tests',
                      ),
      Input_file->new("$AUXILIARY/SBTest.txt", v4.1.0,
-                    Skip => 1,
+                    Skip => 'Validation Tests',
                      ),
      Input_file->new("$AUXILIARY/WBTest.txt", v4.1.0,
-                    Skip => 1,
+                    Skip => 'Validation Tests',
                      ),
      Input_file->new("$AUXILIARY/SentenceBreakProperty.txt", v4.1.0,
                      Property => 'Sentence_Break',
@@ -14873,9 +15023,12 @@ my @input_file_objects = (
                      ),
      Input_file->new('NameAliases.txt', v5.0.0,
                      Property => 'Name_Alias',
+                    Pre_Handler => ($v_version ge v6.0.0)
+                                   ? \&setup_v6_name_alias
+                                   : undef,
                      ),
      Input_file->new("BidiTest.txt", v5.2.0,
-                    Skip => 1,
+                    Skip => 'Validation Tests',
                      ),
      Input_file->new('UnihanIndicesDictionary.txt', v5.2.0,
                      Optional => 1,