mktables: Comments, white-space and typo in message text only

[perl5.git] / lib / unicore / mktables
diff --git a/lib/unicore/mktables b/lib/unicore/mktables

index 82207bc..d2b0930 100644 (file)
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -716,8 +716,8 @@ usage: $0 [-c|-p|-q|-v|-w] [-C dir] [-L filelist] [ -P pod_dir ]
    -makelist   : Rewrite the file list $file_list based on current setup
    -annotate   : Output an annotation for each character in the table files;
                  useful for debugging mktables, looking at diffs; but is slow,
-                memory intensive; resulting tables are usable but slow and
-                very large.
+                memory intensive; resulting tables are usable but are slow and
+                very large (and currently fail the Unicode::UCD.t tests).
    -check A B  : Executes $0 only if A and B are the same
  END
      }
@@ -768,6 +768,9 @@ push @tables_that_may_be_empty, 'Script=Katakana_Or_Hiragana'
                                                      if $v_version ge v4.1.0;
  push @tables_that_may_be_empty, 'Script_Extensions=Katakana_Or_Hiragana'
                                                      if $v_version ge v6.0.0;
+push @tables_that_may_be_empty, 'Grapheme_Cluster_Break=Prepend'
+                                                    if $v_version ge v6.1.0;
+push @tables_that_may_be_empty, '_stc';
  
  # The lists below are hashes, so the key is the item in the list, and the
  # value is the reason why it is in the list.  This makes generation of
@@ -1186,9 +1189,11 @@ my $YES = 1;
  my $IF_NOT_EQUIVALENT = 1; # Replace only under certain conditions; details in
                             # the comments at the subroutine definition.
  my $UNCONDITIONALLY = 2;   # Replace without conditions.
-my $MULTIPLE = 4;          # Don't replace, but add a duplicate record if
+my $MULTIPLE_BEFORE = 4;   # Don't replace, but add a duplicate record if
                             # already there
-my $CROAK = 5;             # Die with an error if is already there
+my $MULTIPLE_AFTER = 5;    # Don't replace, but add a duplicate record if
+                           # already there
+my $CROAK = 6;             # Die with an error if is already there
  
  # Flags to give property statuses.  The phrases are to remind maintainers that
  # if the flag is changed, the indefinite article referring to it in the
@@ -2244,7 +2249,7 @@ sub trace { return main::trace(@_); }
              # its name
              if ($seen_non_extracted_non_age) {
                  if ($file =~ /$EXTRACTED/i) {
-                    Carp::my_carp_bug(join_lines(<<END
+                    Carp::my_carp_bug(main::join_lines(<<END
  $file should be processed just after the 'Prop...Alias' files, and before
  anything not in the $EXTRACTED_DIR directory.  Proceeding, but the results may
  have subtle problems
@@ -2432,7 +2437,8 @@ END
                          || @defaults > 2
                          || ($default =~ /^</
                              && $default !~ /^<code *point>$/i
-                            && $default !~ /^<none>$/i))
+                            && $default !~ /^<none>$/i
+                            && $default !~ /^<script>$/i))
                      {
                          $self->carp_bad_line("Unrecognized \@missing line: $_.  Assuming no missing entries");
                      }
@@ -2453,6 +2459,15 @@ END
                          elsif ($default =~ /^<code *point>$/i) {
                              $default = $CODE_POINT;
                          }
+                        elsif ($default =~ /^<script>$/i) {
+
+                            # Special case this one.  Currently is from
+                            # ScriptExtensions.txt, and means for all unlisted
+                            # code points, use their Script property values.
+                            # For the code points not listed in that file, the
+                            # default value is 'Unknown'.
+                            $default = "Unknown";
+                        }
  
                          # Store them as a sub-arrays with both components.
                          push @{$missings{$addr}}, [ $default, $property ];
@@ -3000,7 +3015,7 @@ sub trace { return main::trace(@_); }
          # either a constructor or a method.  If called as a method, the result
          # will be a new() instance of the calling object, containing the union
          # of that object with the other parameter's code points;  if called as
-        # a constructor, the first parameter gives the class the new object
+        # a constructor, the first parameter gives the class that the new object
          # should be, and the second parameter gives the code points to go into
          # it.
          # In either case, there are two parameters looked at by this routine;
@@ -3019,7 +3034,6 @@ sub trace { return main::trace(@_); }
          # routine therefore belongs in a derived class, but it was moved here
          # to avoid duplication of code.  The failure to overload this in this
          # class keeps it safe.
-        #
  
          my $self;
          my @args;   # Arguments to pass to the constructor
@@ -3093,7 +3107,7 @@ sub trace { return main::trace(@_); }
          for my $set (@records) {
              my $start = $set->start;
              my $end   = $set->end;
-            my $value   = $set->value;
+            my $value = $set->value;
              if ($start > $new->max) {
                  $new->_add_delete('+', $start, $end, $value);
              }
@@ -3335,7 +3349,7 @@ sub trace { return main::trace(@_); }
          #                         new and old values are identical, the
          #                         replacement is skipped to save cycles
          #       => $IF_NOT_EQUIVALENT means to replace the existing values
-        #                         with this one if they are not equivalent.
+        #          (the default)  with this one if they are not equivalent.
          #                         Ranges are equivalent if their types are the
          #                         same, and they are the same string; or if
          #                         both are type 0 ranges, if their Unicode
@@ -3349,13 +3363,16 @@ sub trace { return main::trace(@_); }
          #                         style when the pre-existing and replacement
          #                         standard forms are the same, we can move to
          #                         the modern style
-        #       => $MULTIPLE      means that if this range duplicates an
+        #       => $MULTIPLE_BEFORE means that if this range duplicates an
          #                         existing one, but has a different value,
          #                         don't replace the existing one, but insert
          #                         this, one so that the same range can occur
          #                         multiple times.  They are stored LIFO, so
          #                         that the final one inserted is the first one
          #                         returned in an ordered search of the table.
+        #       => $MULTIPLE_AFTER is like $MULTIPLE_BEFORE, but is stored
+        #                         FIFO, so that this one is inserted after all
+        #                         others that currently exist.
          #       => anything else  is the same as => $IF_NOT_EQUIVALENT
          #
          # "same value" means identical for non-type-0 ranges, and it means
@@ -3587,19 +3604,22 @@ sub trace { return main::trace(@_); }
          # Here, we have taken care of the case where $replace is $NO.
          # Remember that here, r[$i-1]->end < $start <= r[$i]->end
          # If inserting a multiple record, this is where it goes, before the
-        # first (if any) existing one.  This implies an insertion, and no
-        # change to any existing ranges.  Note that $i can be -1 if this new
-        # range doesn't actually duplicate any existing, and comes at the
-        # beginning of the list.
-        if ($replace == $MULTIPLE) {
+        # first (if any) existing one if inserting LIFO.  (If this is to go
+        # afterwards, FIFO, we below move the pointer to there.)  These imply
+        # an insertion, and no change to any existing ranges.  Note that $i
+        # can be -1 if this new range doesn't actually duplicate any existing,
+        # and comes at the beginning of the list.
+        if ($replace == $MULTIPLE_BEFORE || $replace == $MULTIPLE_AFTER) {
  
              if ($start != $end) {
                  Carp::my_carp_bug("$owner_name_of{$addr}Can't cope with adding a multiple record when the range ($start..$end) contains more than one code point.  No action taken.");
                  return;
              }
  
-            # Don't add an exact duplicate, as it isn't really a multiple
+            # If the new code point is within a current range ...
              if ($end >= $r->[$i]->start) {
+
+                # Don't add an exact duplicate, as it isn't really a multiple
                  my $existing_value = $r->[$i]->value;
                  my $existing_type = $r->[$i]->type;
                  return if $value eq $existing_value && $type eq $existing_type;
@@ -3616,11 +3636,27 @@ sub trace { return main::trace(@_); }
                  # pre-existing code point, which will again be a single code
                  # point range.  Because 'i' likely will have changed as a
                  # result of these operations, we can't just continue on, but
-                # do this operation recursively as well.
+                # do this operation recursively as well.  If we are inserting
+                # LIFO, the pre-existing code point needs to go after the new
+                # one, so use MULTIPLE_AFTER; and vice versa.
                  if ($r->[$i]->start != $r->[$i]->end) {
                      $self->_add_delete('-', $start, $end, "");
                      $self->_add_delete('+', $start, $end, $value, Type => $type);
-                    return $self->_add_delete('+', $start, $end, $existing_value, Type => $existing_type, Replace => $MULTIPLE);
+                    return $self->_add_delete('+',
+                            $start, $end,
+                            $existing_value,
+                            Type => $existing_type,
+                            Replace => ($replace == $MULTIPLE_BEFORE)
+                                       ? $MULTIPLE_AFTER
+                                       : $MULTIPLE_BEFORE);
+                }
+            }
+
+            # If to place this new record after, move to beyond all existing
+            # ones.
+            if ($replace == $MULTIPLE_AFTER) {
+                while ($i < @$r && $r->[$i]->start == $start) {
+                    $i++;
                  }
              }
  
@@ -3644,8 +3680,8 @@ sub trace { return main::trace(@_); }
              return @return;
          }
  
-        # Here, we have taken care of $NO and $MULTIPLE replaces.  This leaves
-        # delete, insert, and replace either unconditionally or if not
+        # Here, we have taken care of $NO and $MULTIPLE_foo replaces.  This
+        # leaves delete, insert, and replace either unconditionally or if not
          # equivalent.  $i still points to the first potential affected range.
          # Now find the highest range affected, which will determine the length
          # parameter to splice.  (The input range can span multiple existing
@@ -3756,7 +3792,7 @@ sub trace { return main::trace(@_); }
          $j--;        # $j now points to the highest affected range.
          trace "Final affected range is $j: $r->[$j]" if main::DEBUG && $to_trace;
  
-        # Here, have taken care of $NO and $MULTIPLE replaces.
+        # Here, have taken care of $NO and $MULTIPLE_foo replaces.
          # $j points to the highest affected range.  But it can be < $i or even
          # -1.  These happen only if the insertion is entirely in the gap
          # between r[$i-1] and r[$i].  Here's why: j < i means that the j loop
@@ -4398,7 +4434,7 @@ sub trace { return main::trace(@_); }
          for my $try_hard (0, 1) {
  
              # Look through all the ranges for a usable code point.
-            for my $set ($self->ranges) {
+            for my $set (reverse $self->ranges) {
  
                  # Try the edge cases first, starting with the end point of the
                  # range.
@@ -4457,10 +4493,12 @@ use base '_Range_List_Base';
          my $self = shift;
          my $code_point = shift;
          my $value = shift;
-        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+        my %args = @_;
+        my $replace = delete $args{'Replace'} // $MULTIPLE_BEFORE;
+        Carp::carp_extra_args(\%args) if main::DEBUG && %args;
  
          return $self->add_map($code_point, $code_point,
-                                $value, Replace => $MULTIPLE);
+                                $value, Replace => $replace);
      }
  } # End of closure for package Range_Map
  
@@ -4580,7 +4618,8 @@ sub trace { return main::trace(@_); }
      my %format;
      # The format of the entries of the table.  This is calculated from the
      # data in the table (or passed in the constructor).  This is an enum e.g.,
-    # $STRING_FORMAT
+    # $STRING_FORMAT.  It is marked protected as it should not be generally
+    # used to override calculations.
      main::set_access('format', \%format, 'r', 'p_s');
  
      sub new {
@@ -5164,7 +5203,7 @@ END
  
              if ($annotate) {
  
-                # if annotating each code point, must print 1 per line.
+                # If annotating each code point, must print 1 per line.
                  # The variable could point to a subroutine, and we don't want
                  # to lose that fact, so only set if not set already
                  $range_size_1 = 1 if ! $range_size_1;
@@ -6186,7 +6225,7 @@ END
          return unless defined $name;
  
          if (defined $swash_keys{$name}) {
-            Carp::my_carp(join_lines(<<END
+            Carp::my_carp(main::join_lines(<<END
  Already created a swash name '$name' for $swash_keys{$name}.  This means that
  the same name desired for $self shouldn't be used.  Bad News.  This must be
  fixed before production use, but proceeding anyway
@@ -6226,7 +6265,7 @@ END
  \$utf8::SwashInfo{'To$name'}{'format'} = '$format'; # $map_table_formats{$format}
  END
          if ($specials_name) {
-        $return .= <<END;
+            $return .= <<END;
  \$utf8::SwashInfo{'To$name'}{'specials_name'} = '$specials_name'; # Name of hash of special mappings
  END
          }
@@ -6308,8 +6347,13 @@ END
                                  if $format eq $FLOAT_FORMAT
                                      && $map !~ / ^ -? [0-9]+ \. [0-9]* $ /x;
                              $format = $HEX_FORMAT
-                            if $format eq $RATIONAL_FORMAT
-                                && $map !~ / ^ -? [0-9]+ ( \/ [0-9]+ )? $ /x;
+                                if ($format eq $RATIONAL_FORMAT
+                                       && $map !~
+                                           m/ ^ -? [0-9]+ ( \/ [0-9]+ )? $ /x)
+                                        # Assume a leading zero means hex,
+                                        # even if all digits are 0-9
+                                    || ($format eq $INTEGER_FORMAT
+                                        && $map =~ /^0/);
                              $format = $STRING_FORMAT if $format eq $HEX_FORMAT
                                                         && $map =~ /[^0-9A-F]/;
                          }
@@ -7877,6 +7921,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
                      set_default_map
                      set_file_path
                      set_final_comment
+                    _set_format
                      set_range_size_1
                      set_status
                      set_to_output_map
@@ -8556,15 +8601,6 @@ sub finish_property_setup {
          }
      }
  
-    # This entry is still missing as of 6.0, perhaps because no short name for
-    # it.
-    if (-e 'NameAliases.txt') {
-        my $aliases = property_ref('Name_Alias');
-        if (! defined $aliases) {
-            $aliases = Property->new('Name_Alias');
-        }
-    }
-
      # These are used so much, that we set globals for them.
      $gc = property_ref('General_Category');
      $block = property_ref('Block');
@@ -9782,6 +9818,7 @@ END
      my $input_field_count = $i;
  
      # This routine in addition outputs these extra fields:
+
      my $DECOMP_TYPE = $i++; # Decomposition type
  
      # These fields are modifications of ones above, and are usually
@@ -9901,7 +9938,7 @@ END
                         Range_Size_1 => \&output_perl_charnames_line,
                         Type => $STRING,
                         );
-        $perl_charname->set_proxy_for('Name', 'Name_Alias');
+        $perl_charname->set_proxy_for('Name');
  
          my $Perl_decomp = Property->new('Perl_Decomposition_Mapping',
                                          Directory => File::Spec->curdir(),
@@ -10756,28 +10793,51 @@ END
              return;
          }
  
-        $_ = "$fields[0]; lc; $fields[1]";
-        $file->insert_adjusted_lines("$fields[0]; tc; $fields[2]");
-        $file->insert_adjusted_lines("$fields[0]; uc; $fields[3]");
+        my $decimal_code_point = hex $fields[0];
  
-        # Copy any simple case change to the special tables constructed if
-        # being overridden by a multi-character case change.
-        if ($fields[1] ne $fields[0]
-            && (my $value = $lc->value_of(hex $fields[0])) ne $CODE_POINT)
-        {
-            $file->insert_adjusted_lines("$fields[0]; _slc; $value");
-        }
-        if ($fields[2] ne $fields[0]
-            && (my $value = $tc->value_of(hex $fields[0])) ne $CODE_POINT)
-        {
-            $file->insert_adjusted_lines("$fields[0]; _stc; $value");
-        }
-        if ($fields[3] ne $fields[0]
-            && (my $value = $uc->value_of(hex $fields[0])) ne $CODE_POINT)
-        {
-            $file->insert_adjusted_lines("$fields[0]; _suc; $value");
+        # Loop to handle each of the three mappings in the input line, in
+        # order, with $i indicating the current field number.
+        my $i = 0;
+        for my $object ($lc, $tc, $uc) {
+            $i++;   # First time through, $i = 0 ... 3rd time = 3
+
+            my $value = $object->value_of($decimal_code_point);
+            $value = ($value eq $CODE_POINT)
+                      ? $decimal_code_point
+                      : hex $value;
+
+            # If this isn't a multi-character mapping, it should already have
+            # been read in.
+            if ($fields[$i] !~ / /) {
+                if ($value != hex $fields[$i]) {
+                    Carp::my_carp("Bad news. UnicodeData.txt thinks "
+                                  . $object->name
+                                  . "(0x$fields[0]) is $value"
+                                  . " and SpecialCasing.txt thinks it is "
+                                  . hex $fields[$i]
+                                  . ".  Good luck.  Proceeding anyway.");
+                }
+            }
+            else {
+                $file->insert_adjusted_lines("$fields[0]; "
+                                             . $object->full_name
+                                             . "; $fields[$i]");
+
+                # Copy any simple case change to the special tables
+                # constructed if being overridden by a multi-character case
+                # change.
+                if ($value != $decimal_code_point) {
+                    $file->insert_adjusted_lines(sprintf("%s; _s%s; %04X",
+                                                 $fields[0],
+                                                 $object->name,
+                                                 $value));
+                }
+            }
          }
  
+        # Everything has been handled by the insert_adjusted_lines()
+        $_ = "";
+
          return;
      }
  }
@@ -10818,6 +10878,7 @@ sub filter_old_style_case_folding {
      # Create the map for simple only if are going to output it, for otherwise
      # it takes no part in anything we do.
      my $to_output_simple;
+    my $non_final_folds;
  
      sub setup_case_folding($) {
          # Read in the case foldings in CaseFolding.txt.  This handles both
@@ -10830,6 +10891,12 @@ sub filter_old_style_case_folding {
              property_ref('Case_Folding')->set_proxy_for('Simple_Case_Folding');
          }
  
+        $non_final_folds = $perl->add_match_table("_Perl_Non_Final_Folds",
+                           Perl_Extension => 1,
+                           Fate => $INTERNAL_ONLY,
+                           Description => "Code points that particpate in a multi-char fold and are not the final character of said fold",
+                           );
+
          # If we ever wanted to show that these tables were combined, a new
          # property method could be created, like set_combined_props()
          property_ref('Case_Folding')->add_comment(join_lines( <<END
@@ -10880,7 +10947,14 @@ END
          # so that _swash_inversion_hash() is able to construct closures
          # without having to worry about F mappings.
          if ($type eq 'C' || $type eq 'F' || $type eq 'I' || $type eq 'S') {
-            $_ = "$range; Case_Folding; $CMD_DELIM$REPLACE_CMD=$MULTIPLE$CMD_DELIM$map";
+            $_ = "$range; Case_Folding; "
+                 . "$CMD_DELIM$REPLACE_CMD=$MULTIPLE_BEFORE$CMD_DELIM$map";
+            if ($type eq 'F') {
+                my @string = split " ", $map;
+                for my $i (0 .. @string  - 1 -1) {
+                    $non_final_folds->add_range(hex $string[$i], hex $string[$i]);
+                }
+            }
          }
          else {
              $_ = "";
@@ -11307,13 +11381,13 @@ sub setup_script_extensions {
      # The Script_Extensions property starts out with a clone of the Script
      # property.
  
-    my $sc = property_ref("Script");
-    my $scx = Property->new("scx", Full_Name => "Script_Extensions",
-                  Initialize => $sc,
-                  Default_Map => $sc->default_map,
-                  Pre_Declared_Maps => 0,
-                  Format => $STRING_WHITE_SPACE_LIST,
-                  );
+    my $scx = property_ref("Script_Extensions");
+    $scx = Property->new("scx", Full_Name => "Script_Extensions")
+                                                            if ! defined $scx;
+    $scx->_set_format($STRING_WHITE_SPACE_LIST);
+    $scx->initialize($script);
+    $scx->set_default_map($script->default_map);
+    $scx->set_pre_declared_maps(0);     # PropValueAliases doesn't list these
      $scx->add_comment(join_lines( <<END
  The values for code points that appear in one script are just the same as for
  the 'Script' property.  Likewise the values for those that appear in many
@@ -11323,8 +11397,8 @@ of those scripts.
  END
      ));
  
-    # Make the scx's tables and aliases for them the same as sc's
-    foreach my $table ($sc->tables) {
+    # Initialize scx's tables and the aliases for them to be the same as sc's
+    foreach my $table ($script->tables) {
          my $scx_table = $scx->add_match_table($table->name,
                                  Full_Name => $table->full_name);
          foreach my $alias ($table->aliases) {
@@ -11352,8 +11426,44 @@ sub  filter_script_extensions_line {
      return;
  }
  
-sub setup_v6_name_alias {
-        property_ref('Name_Alias')->add_map(7, 7, "ALERT");
+sub setup_early_name_alias {
+    my $aliases = property_ref('Name_Alias');
+    $aliases = Property->new('Name_Alias') if ! defined $aliases;
+
+    # Before 6.0, this wasn't a problem, and after it, this alias is part of
+    # the Unicode-delivered file.
+    $aliases->add_map(7, 7, "ALERT: control") if $v_version eq v6.0.0;
+    return;
+}
+
+sub filter_later_version_name_alias_line {
+
+    # This file has an extra entry per line for the alias type.  This is
+    # handled by creating a compound entry: "$alias: $type";  First, split
+    # the line into components.
+    my ($range, $alias, $type, @remainder)
+        = split /\s*;\s*/, $_, -1; # -1 => retain trailing null fields
+
+    # This file contains multiple entries for some components, so tell the
+    # downstream code to allow this in our internal tables; the
+    # $MULTIPLE_AFTER preserves the input ordering.
+    $_ = join ";", $range, $CMD_DELIM
+                           . $REPLACE_CMD
+                           . '='
+                           . $MULTIPLE_AFTER
+                           . $CMD_DELIM
+                           . "$alias: $type",
+                   @remainder;
+    return;
+}
+
+sub filter_early_version_name_alias_line {
+
+    # Early versions did not have the trailing alias type field; implicitly it
+    # was 'correction'
+    $_ .= "; correction";
+    filter_later_version_name_alias_line;
+    return;
  }
  
  sub finish_Unicode() {
@@ -11366,7 +11476,8 @@ sub finish_Unicode() {
      # 3) Calculates all the regular expression match tables based on the
      #    mappings.
      # 3) Calculates and adds the tables which are defined by Unicode, but
-    #    which aren't derived by them
+    #    which aren't derived by them, and certain derived tables that Perl
+    #    uses.
  
      # For each property, fill in any missing mappings, and calculate the re
      # match tables.  If a property has more than one missing mapping, the
@@ -11604,7 +11715,8 @@ END
                              Lowercase_Mapping
                              Titlecase_Mapping
                              Case_Folding
-                        } ) {
+                        } )
+    {
          my $full = property_ref($map);
          if ($full->is_empty) {
              my $simple = property_ref('Simple_' . $map);
@@ -12166,20 +12278,71 @@ sub compile_perl() {
      my $alias = property_ref('Name_Alias');
      if (defined $alias) {
          push @composition, 'Name_Alias';
+        $perl_charname->set_proxy_for('Name_Alias');
+        my $unicode_1 = property_ref('Unicode_1_Name');
+        my %abbreviations;
+
+        # Add each entry in Name_Alias to Perl_Charnames.  Where these go with
+        # respect to any existing entry depends on the entry type.
+        # Corrections go before said entry, as they should be returned in
+        # preference over the existing entry.  (A correction to a correction
+        # should be later in the Name_Alias table, so it will correctly
+        # precede the erroneous correction in Perl_Charnames.)
+        #
+        # Abbreviations go after everything else, so they are saved
+        # temporarily in a hash for later.
+        #
+        # Controls are currently added afterwards.  This is because Perl has
+        # previously used the Unicode1 name, and so should still use that.
+        # (Most of them will be the same anyway, in which case we don't add a
+        # duplicate)
+
          $alias->reset_each_range;
          while (my ($range) = $alias->each_range) {
              next if $range->value eq "";
-            if ($range->start != $range->end) {
-                Carp::my_carp("Expecting only one code point in the range $range.  Just to keep going, using just the first code point;");
+            my $code_point = $range->start;
+            if ($code_point != $range->end) {
+                Carp::my_carp_bug("Bad News.  Expecting only one code point in the range $range.  Just to keep going, using only the first code point;");
+            }
+            my ($value, $type) = split ': ', $range->value;
+            my $replace_type;
+            if ($type eq 'correction') {
+                $replace_type = $MULTIPLE_BEFORE;
+            }
+            elsif ($type eq 'abbreviation') {
+
+                # Save for later
+                $abbreviations{$value} = $code_point;
+                next;
              }
-            $perl_charname->add_duplicate($range->start, $range->value);
+            elsif ($type eq 'control') {
+                my $unicode_1_value = $unicode_1->value_of($code_point);
+                next if $unicode_1_value eq $value;
+                $replace_type = $MULTIPLE_AFTER;
+            }
+            else {
+                $replace_type = $MULTIPLE_AFTER;
+            }
+
+            # Actually add; before or after current entry(ies) as determined
+            # above.
+            $perl_charname->add_duplicate($code_point, $value, Replace => $replace_type);
+        }
+
+        # Now that have everything added, add in abbreviations after
+        # everything else.
+        foreach my $value (keys %abbreviations) {
+            $perl_charname->add_duplicate($abbreviations{$value}, $value, Replace => $MULTIPLE_AFTER);
          }
          $alias_sentence = <<END;
-The Name_Alias property adds duplicate code point entries with a corrected
-name.  The original (less correct, but still valid) name will be physically
-last.
+The Name_Alias property adds duplicate code point entries that are
+alternatives to the original name.  If an addition is a corrected
+name, it will be physically first in the table.  The original (less correct,
+but still valid) name will be next; then any alternatives, in no particular
+order; and finally any abbreviations, again in no particular order.
  END
      }
+
      my $comment;
      if (@composition <= 2) { # Always at least 2
          $comment = join " and ", @composition;
@@ -12191,8 +12354,8 @@ END
  
      $perl_charname->add_comment(join_lines( <<END
  This file is for charnames.pm.  It is the union of the $comment properties.
-Unicode_1_Name entries are used only for otherwise nameless code
-points.
+Unicode_1_Name entries are used only for nameless code points in the Name
+property.
  $alias_sentence
  This file doesn't include the algorithmically determinable names.  For those,
  use 'unicore/Name.pm'
@@ -13400,7 +13563,7 @@ sub make_ucd_table_pod_entries {
                      || $ucd_pod{$standard}{'perl_extension'} == $perl_extension
                      || $output_this == $perl_extension)
                  {
-                    Carp::my_carp("Bad news.  $property and $ucd_pod{$standard}->{'property'} have unexpected output statuss and perl-extension combinations.  Proceeding anyway.");
+                    Carp::my_carp("Bad news.  $property and $ucd_pod{$standard}->{'property'} have unexpected output status and perl-extension combinations.  Proceeding anyway.");
                  }
  
                  # We modifiy the info column of the one being output to
@@ -15585,7 +15748,7 @@ sub make_property_test_script() {
  # others except DAge.txt (as data in an extracted file can be over-ridden by
  # the non-extracted.  Some other files depend on data derived from an earlier
  # file, like UnicodeData requires data from Jamo, and the case changing and
-# folding requires data from Unicode.  Mostly, it safest to order by first
+# folding requires data from Unicode.  Mostly, it is safest to order by first
  # version releases in (except the Jamo).  DAge.txt is read before the
  # extracted ones because of the rarely used feature $compare_versions.  In the
  # unlikely event that there were ever an extracted file that contained the Age
@@ -15790,9 +15953,12 @@ my @input_file_objects = (
                      ),
      Input_file->new('NameAliases.txt', v5.0.0,
                      Property => 'Name_Alias',
-                    Pre_Handler => ($v_version ge v6.0.0)
-                                   ? \&setup_v6_name_alias
+                    Pre_Handler => ($v_version le v6.0.0)
+                                   ? \&setup_early_name_alias
                                     : undef,
+                    Each_Line_Handler => ($v_version le v6.0.0)
+                                   ? \&filter_early_version_name_alias_line
+                                   : \&filter_later_version_name_alias_line,
                      ),
      Input_file->new("BidiTest.txt", v5.2.0,
                      Skip => 'Validation Tests',
@@ -15834,6 +16000,9 @@ my @input_file_objects = (
                      Property => 'Script_Extensions',
                      Pre_Handler => \&setup_script_extensions,
                      Each_Line_Handler => \&filter_script_extensions_line,
+                    Has_Missings_Defaults => (($v_version le v6.0.0)
+                                            ? $NO_DEFAULTS
+                                            : $IGNORED),
                      ),
      # The two Indic files are actually available starting in v6.0.0, but their
      # property values are missing from PropValueAliases.txt in that release,