X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/8f2f18b96726d6606523dad32338ebe9e36c8a5f..1de764bcebedbbdca109d0dad476505038e0e18d:/lib/unicore/mktables

diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 90de370..40ea057 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -16,6 +16,13 @@
 # that instituted the change to main::objaddr, and subsequent commits that
 # changed 0+$self to pack 'J', $self.)
 
+my $start_time;
+BEGIN { # Get the time the script started running; do it at compiliation to
+        # get it as close as possible
+    $start_time= time;
+}
+
+
 require 5.010_001;
 use strict;
 use warnings;
@@ -712,7 +719,7 @@ END
 
 # Stores the most-recently changed file.  If none have changed, can skip the
 # build
-my $youngest = -M $0;   # Do this before the chdir!
+my $most_recent = (stat $0)[9];   # Do this before the chdir!
 
 # Change directories now, because need to read 'version' early.
 if ($use_directory) {
@@ -1135,7 +1142,8 @@ my %status_past_participles = (
     $DEPRECATED => 'deprecated',
 );
 
-# The format of the values of the map tables:
+# The format of the values of the tables:
+my $EMPTY_FORMAT = "";
 my $BINARY_FORMAT = 'b';
 my $DECIMAL_FORMAT = 'd';
 my $FLOAT_FORMAT = 'f';
@@ -1143,6 +1151,7 @@ my $INTEGER_FORMAT = 'i';
 my $HEX_FORMAT = 'x';
 my $RATIONAL_FORMAT = 'r';
 my $STRING_FORMAT = 's';
+my $DECOMP_STRING_FORMAT = 'c';
 
 my %map_table_formats = (
     $BINARY_FORMAT => 'binary',
@@ -1151,7 +1160,8 @@ my %map_table_formats = (
     $INTEGER_FORMAT => 'integer',
     $HEX_FORMAT => 'positive hex whole number; a code point',
     $RATIONAL_FORMAT => 'rational: an integer or a fraction',
-    $STRING_FORMAT => 'arbitrary string',
+    $STRING_FORMAT => 'string',
+    $DECOMP_STRING_FORMAT => 'Perl\'s internal (Normalize.pm) decompostion mapping',
 );
 
 # Unicode didn't put such derived files in a separate directory at first.
@@ -2847,8 +2857,8 @@ sub trace { return main::trace(@_); }
         return $i + 1;
     }
 
-    sub value_of {
-        # Returns the value associated with the code point, undef if none
+    sub containing_range {
+        # Returns the range object that contains the code point, undef if none
 
         my $self = shift;
         my $codepoint = shift;
@@ -2859,7 +2869,34 @@ sub trace { return main::trace(@_); }
 
         # contains() returns 1 beyond where we should look
         no overloading;
-        return $ranges{pack 'J', $self}->[$i-1]->value;
+        return $ranges{pack 'J', $self}->[$i-1];
+    }
+
+    sub value_of {
+        # Returns the value associated with the code point, undef if none
+
+        my $self = shift;
+        my $codepoint = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $range = $self->containing_range($codepoint);
+        return unless defined $range;
+
+        return $range->value;
+    }
+
+    sub type_of {
+        # Returns the type of the range containing the code point, undef if
+        # the code point is not in the table
+
+        my $self = shift;
+        my $codepoint = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $range = $self->containing_range($codepoint);
+        return unless defined $range;
+
+        return $range->type;
     }
 
     sub _search_ranges {
@@ -4184,6 +4221,12 @@ sub trace { return main::trace(@_); }
     # The constructor can override the global flag of the same name.
     main::set_access('output_range_counts', \%output_range_counts, 'r');
 
+    my %format;
+    # The format of the entries of the table.  This is calculated from the
+    # data in the table (or passed in the constructor).  This is an enum e.g.,
+    # $STRING_FORMAT
+    main::set_access('format', \%format, 'r', 'p_s');
+
     sub new {
         # All arguments are key => value pairs, which you can see below, most
         # of which match fields documented above.  Otherwise: Pod_Entry,
@@ -4204,6 +4247,7 @@ sub trace { return main::trace(@_); }
         $full_name{$addr} = delete $args{'Full_Name'};
         my $complete_name = $complete_name{$addr}
                           = delete $args{'Complete_Name'};
+        $format{$addr} = delete $args{'Format'};
         $internal_only{$addr} = delete $args{'Internal_Only_Warning'} || 0;
         $output_range_counts{$addr} = delete $args{'Output_Range_Counts'};
         $property{$addr} = delete $args{'_Property'};
@@ -4321,12 +4365,18 @@ sub trace { return main::trace(@_); }
     # Here are the methods that are required to be defined by any derived
     # class
     for my $sub (qw(
+                    handle_special_range
                     append_to_body
                     pre_body
                 ))
-                # append_to_body and pre_body are called in the write() method
-                # to add stuff after the main body of the table, but before
-                # its close; and to prepend stuff before the beginning of the
+                # write() knows how to write out normal ranges, but it calls
+                # handle_special_range() when it encounters a non-normal one.
+                # append_to_body() is called by it after it has handled all
+                # ranges to add anything after the main portion of the table.
+                # And finally, pre_body() is called after all this to build up
+                # anything that should appear before the main portion of the
+                # table.  Doing it this way allows things in the middle to
+                # affect what should appear before the main portion of the
                 # table.
     {
         no strict "refs";
@@ -4633,7 +4683,10 @@ sub trace { return main::trace(@_); }
     }
 
     sub write {
-        # Write a representation of the table to its file.
+        # Write a representation of the table to its file.  It calls several
+        # functions furnished by sub-classes of this abstract base class to
+        # handle non-normal ranges, to add stuff before the table, and at its
+        # end.
 
         my $self = shift;
         my $tab_stops = shift;       # The number of tab stops over to put any
@@ -4646,17 +4699,18 @@ sub trace { return main::trace(@_); }
         my $addr = do { no overloading; pack 'J', $self; };
 
         # Start with the header
-        my @OUT = $self->header;
+        my @HEADER = $self->header;
 
         # Then the comments
-        push @OUT, "\n", main::simple_fold($comment{$addr}, '# '), "\n"
+        push @HEADER, "\n", main::simple_fold($comment{$addr}, '# '), "\n"
                                                         if $comment{$addr};
 
-        # Then any pre-body stuff.
-        my $pre_body = $self->pre_body;
-        push @OUT, $pre_body, "\n" if $pre_body;
+        # Things discovered processing the main body of the document may
+        # affect what gets output before it, therefore pre_body() isn't called
+        # until after all other processing of the table is done.
 
-        # The main body looks like a 'here' document
+        # The main body looks like a 'here' document.
+        my @OUT;
         push @OUT, "return <<'END';\n";
 
         if ($range_list{$addr}->is_empty) {
@@ -4671,13 +4725,18 @@ sub trace { return main::trace(@_); }
             my $range_size_1 = $range_size_1{$addr};
 
             # Output each range as part of the here document.
+            RANGE:
             for my $set ($range_list{$addr}->ranges) {
+                if ($set->type != 0) {
+                    $self->handle_special_range($set);
+                    next RANGE;
+                }
                 my $start = $set->start;
                 my $end   = $set->end;
                 my $value  = $set->value;
 
                 # Don't output ranges whose value is the one to suppress
-                next if defined $suppress_value && $value eq $suppress_value;
+                next RANGE if defined $suppress_value && $value eq $suppress_value;
 
                 # If has or wants a single point range output
                 if ($start == $end || $range_size_1) {
@@ -4734,10 +4793,15 @@ sub trace { return main::trace(@_); }
         # And finish the here document.
         push @OUT, "END\n";
 
+        # Done with the main portion of the body.  Can now figure out what
+        # should appear before it in the file.
+        my $pre_body = $self->pre_body;
+        push @HEADER, $pre_body, "\n" if $pre_body;
+
         # All these files have a .pl suffix
         $file_path{$addr}->[-1] .= '.pl';
 
-        main::write($file_path{$addr}, \@OUT);
+        main::write($file_path{$addr}, \@HEADER, \@OUT);
         return;
     }
 
@@ -4809,6 +4873,7 @@ sub trace { return main::trace(@_); }
     # Accessors for the range list stored in this table.  First for
     # unconditional
     for my $sub (qw(
+                    containing_range
                     contains
                     count
                     each_range
@@ -4818,6 +4883,7 @@ sub trace { return main::trace(@_); }
                     min
                     range_count
                     reset_each_range
+                    type_of
                     value_of
                 ))
     {
@@ -4897,26 +4963,11 @@ sub trace { return main::trace(@_); }
                     \%anomalous_entries,
                     'readable_array');
 
-    my %format;
-    # The format of the entries of the table.  This is calculated from the
-    # data in the table (or passed in the constructor).  This is an enum e.g.,
-    # $STRING_FORMAT
-    main::set_access('format', \%format);
-
     my %core_access;
     # This is a string, solely for documentation, indicating how one can get
     # access to this property via the Perl core.
     main::set_access('core_access', \%core_access, 'r', 's');
 
-    my %has_specials;
-    # Boolean set when non-zero map-type ranges are added to this table,
-    # which happens in only a few tables.  This is purely for performance, to
-    # avoid having to search through every table upon output, so if all the
-    # non-zero maps got deleted before output, this would remain set, and the
-    # only penalty would be performance.  Currently, most map tables that get
-    # output have specials in them, so this doesn't help that much anyway.
-    main::set_access('has_specials', \%has_specials);
-
     my %to_output_map;
     # Boolean as to whether or not to write out this map table
     main::set_access('to_output_map', \%to_output_map, 's');
@@ -4933,7 +4984,6 @@ sub trace { return main::trace(@_); }
 
         my $core_access = delete $args{'Core_Access'};
         my $default_map = delete $args{'Default_Map'};
-        my $format = delete $args{'Format'};
         my $property = delete $args{'_Property'};
         my $full_name = delete $args{'Full_Name'};
         # Rest of parameters passed on
@@ -4953,7 +5003,6 @@ sub trace { return main::trace(@_); }
         $anomalous_entries{$addr} = [];
         $core_access{$addr} = $core_access;
         $default_map{$addr} = $default_map;
-        $format{$addr} = $format;
 
         $self->initialize($initialize) if defined $initialize;
 
@@ -5002,8 +5051,6 @@ sub trace { return main::trace(@_); }
 
         my $addr = do { no overloading; pack 'J', $self; };
 
-        $has_specials{$addr} = 1 if $type;
-
         $self->_range_list->add_map($lower, $upper,
                                     $string,
                                     @_,
@@ -5063,11 +5110,6 @@ sub trace { return main::trace(@_); }
                                           Replace => $UNCONDITIONALLY);
         }
 
-        # Copy the specials information from the other table to $self
-        if ($has_specials{$other_addr}) {
-            $has_specials{$addr} = 1;
-        }
-
         return;
     }
 
@@ -5331,12 +5373,117 @@ END
 
     my %swash_keys; # Makes sure don't duplicate swash names.
 
+    # The remaining variables are temporaries used while writing each table,
+    # to output special ranges.
+    my $has_hangul_syllables;
+    my @multi_code_point_maps;  # Map is to more than one code point.
+
+    # The key is the base name of the code point, and the value is an
+    # array giving all the ranges that use this base name.  Each range
+    # is actually a hash giving the 'low' and 'high' values of it.
+    my %names_ending_in_code_point;
+
+    # Inverse mapping.  The list of ranges that have these kinds of
+    # names.  Each element contains the low, high, and base names in a
+    # hash.
+    my @code_points_ending_in_code_point;
+
+    sub handle_special_range {
+        # Called in the middle of write when it finds a range it doesn't know
+        # how to handle.
+
+        my $self = shift;
+        my $range = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $addr = do { no overloading; pack 'J', $self; };
+
+        my $type = $range->type;
+
+        my $low = $range->start;
+        my $high = $range->end;
+        my $map = $range->value;
+
+        # No need to output the range if it maps to the default.
+        return if $map eq $default_map{$addr};
+
+        # Switch based on the map type...
+        if ($type == $HANGUL_SYLLABLE) {
+
+            # These are entirely algorithmically determinable based on
+            # some constants furnished by Unicode; for now, just set a
+            # flag to indicate that have them.  After everything is figured
+            # out, we will output the code that does the algorithm.
+            $has_hangul_syllables = 1;
+        }
+        elsif ($type == $CP_IN_NAME) {
+
+            # Code points whose the name ends in their code point are also
+            # algorithmically determinable, but need information about the map
+            # to do so.  Both the map and its inverse are stored in data
+            # structures output in the file.
+            push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
+            push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
+
+            push @code_points_ending_in_code_point, { low => $low,
+                                                        high => $high,
+                                                        name => $map
+                                                    };
+        }
+        elsif ($range->type == $MULTI_CP || $range->type == $NULL) {
+
+            # Multi-code point maps and null string maps have an entry
+            # for each code point in the range.  They use the same
+            # output format.
+            for my $code_point ($low .. $high) {
+
+                # The pack() below can't cope with surrogates.
+                if ($code_point >= 0xD800 && $code_point <= 0xDFFF) {
+                    Carp::my_carp("Surrogage code point '$code_point' in mapping to '$map' in $self.  No map created");
+                    next;
+                }
+
+                # Generate the hash entries for these in the form that
+                # utf8.c understands.
+                my $tostr = "";
+                my $to_name = "";
+                my $to_chr = "";
+                foreach my $to (split " ", $map) {
+                    if ($to !~ /^$code_point_re$/) {
+                        Carp::my_carp("Illegal code point '$to' in mapping '$map' from $code_point in $self.  No map created");
+                        next;
+                    }
+                    $tostr .= sprintf "\\x{%s}", $to;
+                    $to = CORE::hex $to;
+                }
+
+                # I (khw) have never waded through this line to
+                # understand it well enough to comment it.
+                my $utf8 = sprintf(qq["%s" => "$tostr",],
+                        join("", map { sprintf "\\x%02X", $_ }
+                            unpack("U0C*", pack("U", $code_point))));
+
+                # Add a comment so that a human reader can more easily
+                # see what's going on.
+                push @multi_code_point_maps,
+                        sprintf("%-45s # U+%04X", $utf8, $code_point);
+                $multi_code_point_maps[-1] .= " => $map";
+            }
+        }
+        else {
+            Carp::my_carp("Unrecognized map type '$range->type' in '$range' in $self.  Not written");
+        }
+
+        return;
+    }
+
     sub pre_body {
         # Returns the string that should be output in the file before the main
-        # body of this table.  This includes some hash entries identifying the
-        # format of the body, and what the single value should be for all
-        # ranges missing from it.  It also includes any code points which have
-        # map_types that don't go in the main table.
+        # body of this table.  It isn't called until the main body is
+        # calculated, saving a pass.  The string includes some hash entries
+        # identifying the format of the body, and what the single value should
+        # be for all ranges missing from it.  It also includes any code points
+        # which have map_types that don't go in the main table.
 
         my $self = shift;
         Carp::carp_extra_args(\@_) if main::DEBUG && @_;
@@ -5355,119 +5502,13 @@ END
         }
         $swash_keys{$name} = "$self";
 
-        my $default_map = $default_map{$addr};
-
         my $pre_body = "";
-        if ($has_specials{$addr}) {
-
-            # Here, some maps with non-zero type have been added to the table.
-            # Go through the table and handle each of them.  None will appear
-            # in the body of the table, so delete each one as we go.  The
-            # code point count has already been calculated, so ok to delete
-            # now.
-
-            my @multi_code_point_maps;
-            my $has_hangul_syllables = 0;
-
-            # The key is the base name of the code point, and the value is an
-            # array giving all the ranges that use this base name.  Each range
-            # is actually a hash giving the 'low' and 'high' values of it.
-            my %names_ending_in_code_point;
-
-            # Inverse mapping.  The list of ranges that have these kinds of
-            # names.  Each element contains the low, high, and base names in a
-            # hash.
-            my @code_points_ending_in_code_point;
-
-            my $range_map = $self->_range_list;
-            foreach my $range ($range_map->ranges) {
-                next unless $range->type != 0;
-                my $low = $range->start;
-                my $high = $range->end;
-                my $map = $range->value;
-                my $type = $range->type;
-
-                # No need to output the range if it maps to the default.  And
-                # the write method won't output it either, so no need to
-                # delete it to keep it from being output, and is faster to
-                # skip than to delete anyway.
-                next if $map eq $default_map;
-
-                # Delete the range to keep write() from trying to output it
-                $range_map->delete_range($low, $high);
-
-                # Switch based on the map type...
-                if ($type == $HANGUL_SYLLABLE) {
-
-                    # These are entirely algorithmically determinable based on
-                    # some constants furnished by Unicode; for now, just set a
-                    # flag to indicate that have them.  Below we will output
-                    # the code that does the algorithm.
-                    $has_hangul_syllables = 1;
-                }
-                elsif ($type == $CP_IN_NAME) {
-
-                    # If the name ends in the code point it represents, are
-                    # also algorithmically determinable, but need information
-                    # about the map to do so.  Both the map and its inverse
-                    # are stored in data structures output in the file.
-                    push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
-                    push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
-
-                    push @code_points_ending_in_code_point, { low => $low,
-                                                              high => $high,
-                                                              name => $map
-                                                            };
-                }
-                elsif ($range->type == $MULTI_CP || $range->type == $NULL) {
 
-                    # Multi-code point maps and null string maps have an entry
-                    # for each code point in the range.  They use the same
-                    # output format.
-                    for my $code_point ($low .. $high) {
-
-                        # The pack() below can't cope with surrogates.
-                        if ($code_point >= 0xD800 && $code_point <= 0xDFFF) {
-                            Carp::my_carp("Surrogage code point '$code_point' in mapping to '$map' in $self.  No map created");
-                            next;
-                        }
-
-                        # Generate the hash entries for these in the form that
-                        # utf8.c understands.
-                        my $tostr = "";
-                        foreach my $to (split " ", $map) {
-                            if ($to !~ /^$code_point_re$/) {
-                                Carp::my_carp("Illegal code point '$to' in mapping '$map' from $code_point in $self.  No map created");
-                                next;
-                            }
-                            $tostr .= sprintf "\\x{%s}", $to;
-                        }
-
-                        # I (khw) have never waded through this line to
-                        # understand it well enough to comment it.
-                        my $utf8 = sprintf(qq["%s" => "$tostr",],
-                                join("", map { sprintf "\\x%02X", $_ }
-                                    unpack("U0C*", pack("U", $code_point))));
-
-                        # Add a comment so that a human reader can more easily
-                        # see what's going on.
-                        push @multi_code_point_maps,
-                                sprintf("%-45s # U+%04X => %s", $utf8,
-                                                                $code_point,
-                                                                $map);
-                    }
-                }
-                else {
-                    Carp::my_carp("Unrecognized map type '$range->type' in '$range' in $self.  Using type 0 instead");
-                    $range_map->add_map($low, $high, $map, Replace => $UNCONDITIONALLY, Type => 0);
-                }
-            } # End of loop through all ranges
-
-            # Here have gone through the whole file.  If actually generated
-            # anything for each map type, add its respective header and
-            # trailer
-            if (@multi_code_point_maps) {
-                $pre_body .= <<END;
+        # Here we assume we were called after have gone through the whole
+        # file.  If we actually generated anything for each map type, add its
+        # respective header and trailer
+        if (@multi_code_point_maps) {
+            $pre_body .= <<END;
 
 # Some code points require special handling because their mappings are each to
 # multiple code points.  These do not appear in the main body, but are defined
@@ -5478,59 +5519,59 @@ END
 # under "use bytes").  Each value is the UTF-8 of the translation, for speed.
 %utf8::ToSpec$name = (
 END
-                $pre_body .= join("\n", @multi_code_point_maps) . "\n);\n";
-            }
-
-            if ($has_hangul_syllables || @code_points_ending_in_code_point) {
-
-                # Convert these structures to output format.
-                my $code_points_ending_in_code_point =
-                    main::simple_dumper(\@code_points_ending_in_code_point,
-                                        ' ' x 8);
-                my $names = main::simple_dumper(\%names_ending_in_code_point,
-                                                ' ' x 8);
-
-                # Do the same with the Hangul names,
-                my $jamo;
-                my $jamo_l;
-                my $jamo_v;
-                my $jamo_t;
-                my $jamo_re;
-                if ($has_hangul_syllables) {
-
-                    # Construct a regular expression of all the possible
-                    # combinations of the Hangul syllables.
-                    my @L_re;   # Leading consonants
-                    for my $i ($LBase .. $LBase + $LCount - 1) {
-                        push @L_re, $Jamo{$i}
-                    }
-                    my @V_re;   # Middle vowels
-                    for my $i ($VBase .. $VBase + $VCount - 1) {
-                        push @V_re, $Jamo{$i}
-                    }
-                    my @T_re;   # Trailing consonants
-                    for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
-                        push @T_re, $Jamo{$i}
-                    }
-
-                    # The whole re is made up of the L V T combination.
-                    $jamo_re = '('
-                               . join ('|', sort @L_re)
-                               . ')('
-                               . join ('|', sort @V_re)
-                               . ')('
-                               . join ('|', sort @T_re)
-                               . ')?';
-
-                    # These hashes needed by the algorithm were generated
-                    # during reading of the Jamo.txt file
-                    $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
-                    $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
-                    $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
-                    $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
+            $pre_body .= join("\n", @multi_code_point_maps) . "\n);\n";
+        }
+
+        if ($has_hangul_syllables || @code_points_ending_in_code_point) {
+
+            # Convert these structures to output format.
+            my $code_points_ending_in_code_point =
+                main::simple_dumper(\@code_points_ending_in_code_point,
+                                    ' ' x 8);
+            my $names = main::simple_dumper(\%names_ending_in_code_point,
+                                            ' ' x 8);
+
+            # Do the same with the Hangul names,
+            my $jamo;
+            my $jamo_l;
+            my $jamo_v;
+            my $jamo_t;
+            my $jamo_re;
+            if ($has_hangul_syllables) {
+
+                # Construct a regular expression of all the possible
+                # combinations of the Hangul syllables.
+                my @L_re;   # Leading consonants
+                for my $i ($LBase .. $LBase + $LCount - 1) {
+                    push @L_re, $Jamo{$i}
+                }
+                my @V_re;   # Middle vowels
+                for my $i ($VBase .. $VBase + $VCount - 1) {
+                    push @V_re, $Jamo{$i}
+                }
+                my @T_re;   # Trailing consonants
+                for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
+                    push @T_re, $Jamo{$i}
                 }
 
-                $pre_body .= <<END;
+                # The whole re is made up of the L V T combination.
+                $jamo_re = '('
+                            . join ('|', sort @L_re)
+                            . ')('
+                            . join ('|', sort @V_re)
+                            . ')('
+                            . join ('|', sort @T_re)
+                            . ')?';
+
+                # These hashes needed by the algorithm were generated
+                # during reading of the Jamo.txt file
+                $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
+                $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
+                $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
+                $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
+            }
+
+            $pre_body .= <<END;
 
 # To achieve significant memory savings when this file is read in,
 # algorithmically derivable code points are omitted from the main body below.
@@ -5557,10 +5598,10 @@ $names
 $code_points_ending_in_code_point
     );
 END
-                # Earlier releases didn't have Jamos.  No sense outputting
-                # them unless will be used.
-                if ($has_hangul_syllables) {
-                    $pre_body .= <<END;
+            # Earlier releases didn't have Jamos.  No sense outputting
+            # them unless will be used.
+            if ($has_hangul_syllables) {
+                $pre_body .= <<END;
 
     # Convert from code point to Jamo short name for use in composing Hangul
     # syllable names
@@ -5602,9 +5643,9 @@ $jamo_t
     my \$TCount = $TCount;
     my \$NCount = \$VCount * \$TCount;
 END
-                } # End of has Jamos
+            } # End of has Jamos
 
-                $pre_body .= << 'END';
+            $pre_body .= << 'END';
 
     sub name_to_code_point_special {
         my $name = shift;
@@ -5612,8 +5653,8 @@ END
         # Returns undef if not one of the specially handled names; otherwise
         # returns the code point equivalent to the input name
 END
-                if ($has_hangul_syllables) {
-                    $pre_body .= << 'END';
+            if ($has_hangul_syllables) {
+                $pre_body .= << 'END';
 
         if (substr($name, 0, $HANGUL_SYLLABLE_LENGTH) eq $HANGUL_SYLLABLE) {
             $name = substr($name, $HANGUL_SYLLABLE_LENGTH);
@@ -5624,8 +5665,8 @@ END
             return ($L * $VCount + $V) * $TCount + $T + $SBase;
         }
 END
-                }
-                $pre_body .= << 'END';
+            }
+            $pre_body .= << 'END';
 
         # Name must end in '-code_point' for this to handle.
         if ($name !~ /^ (.*) - ($code_point_re) $/x) {
@@ -5659,8 +5700,8 @@ END
         # Returns the name of a code point if algorithmically determinable;
         # undef if not
 END
-                if ($has_hangul_syllables) {
-                    $pre_body .= << 'END';
+            if ($has_hangul_syllables) {
+                $pre_body .= << 'END';
 
         # If in the Hangul range, calculate the name based on Unicode's
         # algorithm
@@ -5675,8 +5716,8 @@ END
             return $name;
         }
 END
-                }
-                $pre_body .= << 'END';
+            }
+            $pre_body .= << 'END';
 
         # Look through list of these code points for one in range.
         foreach my $hash (@code_points_ending_in_code_point) {
@@ -5690,13 +5731,50 @@ END
 } # End closure
 
 END
-            } # End of has hangul or code point in name maps.
-        } # End of has specials
+        } # End of has hangul or code point in name maps.
+
+        my $format = $self->format;
+
+        my $return = <<END;
+# The name this swash is to be known by, with the format of the mappings in
+# the main body of the table, and what all code points missing from this file
+# map to.
+\$utf8::SwashInfo{'To$name'}{'format'} = '$format'; # $map_table_formats{$format}
+END
+        my $default_map = $default_map{$addr};
+        $return .= "\$utf8::SwashInfo{'To$name'}{'missing'} = '$default_map';";
+
+        if ($default_map eq $CODE_POINT) {
+            $return .= ' # code point maps to itself';
+        }
+        elsif ($default_map eq "") {
+            $return .= ' # code point maps to the null string';
+        }
+        $return .= "\n";
+
+        $return .= $pre_body;
+
+        return $return;
+    }
+
+    sub write {
+        # Write the table to the file.
+
+        my $self = shift;
+        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+        my $addr = do { no overloading; pack 'J', $self; };
+
+        # Clear the temporaries
+        $has_hangul_syllables = 0;
+        undef @multi_code_point_maps;
+        undef %names_ending_in_code_point;
+        undef @code_points_ending_in_code_point;
 
         # Calculate the format of the table if not already done.
-        my $format = $format{$addr};
-        my $property = $self->property;
-        my $type = $property->type;
+        my $format = $self->format;
+        my $type = $self->property->type;
+        my $default_map = $self->default_map;
         if (! defined $format) {
             if ($type == $BINARY) {
 
@@ -5727,6 +5805,8 @@ END
                     # most restrictive, and so on.
                     $format = $DECIMAL_FORMAT;
                     foreach my $range (@ranges) {
+                        next if $range->type != 0;  # Non-normal ranges don't
+                                                    # affect the main body
                         my $map = $range->value;
                         if ($map ne $default_map) {
                             last if $format eq $STRING_FORMAT;  # already at
@@ -5752,47 +5832,21 @@ END
             }
         } # end of calculating format
 
-        my $return = <<END;
-# The name this swash is to be known by, with the format of the mappings in
-# the main body of the table, and what all code points missing from this file
-# map to.
-\$utf8::SwashInfo{'To$name'}{'format'} = '$format'; # $map_table_formats{$format}
-END
-        my $missing = $default_map;
-        if ($missing eq $CODE_POINT
+        if ($default_map eq $CODE_POINT
             && $format ne $HEX_FORMAT
-            && ! defined $format{$addr})    # Is expected if was manually set
+            && ! defined $self->format)    # manual settings are always
+                                           # considered ok
         {
             Carp::my_carp_bug("Expecting hex format for mapping table for $self, instead got '$format'")
         }
-        $format{$addr} = $format;
-        $return .= "\$utf8::SwashInfo{'To$name'}{'missing'} = '$missing';";
-        if ($missing eq $CODE_POINT) {
-            $return .= ' # code point maps to itself';
-        }
-        elsif ($missing eq "") {
-            $return .= ' # code point maps to the null string';
-        }
-        $return .= "\n";
-
-        $return .= $pre_body;
-
-        return $return;
-    }
-
-    sub write {
-        # Write the table to the file.
-
-        my $self = shift;
-        Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-        my $addr = do { no overloading; pack 'J', $self; };
+        $self->_set_format($format);
 
         return $self->SUPER::write(
             ($self->property == $block)
                 ? 7     # block file needs more tab stops
                 : 3,
-            $default_map{$addr});   # don't write defaulteds
+            $default_map);   # don't write defaulteds
     }
 
     # Accessors for the underlying list that should fail if locked.
@@ -5910,6 +5964,7 @@ sub trace { return main::trace(@_); }
         # Optional
         my $initialize = delete $args{'Initialize'};
         my $matches_all = delete $args{'Matches_All'} || 0;
+        my $format = delete $args{'Format'};
         # Rest of parameters passed on.
 
         my $range_list = Range_List->new(Initialize => $initialize,
@@ -5932,6 +5987,7 @@ sub trace { return main::trace(@_); }
                                       Full_Name => $full_name,
                                       _Property => $property,
                                       _Range_List => $range_list,
+                                      Format => $EMPTY_FORMAT,
                                       );
         my $addr = do { no overloading; pack 'J', $self; };
 
@@ -5942,6 +5998,10 @@ sub trace { return main::trace(@_); }
         $leader{$addr} = $self;
         $parent{$addr} = $self;
 
+        if (defined $format && $format ne $EMPTY_FORMAT) {
+            Carp::my_carp_bug("'Format' must be '$EMPTY_FORMAT' in a match table instead of '$format'.  Using '$EMPTY_FORMAT'");
+        }
+
         return $self;
     }
 
@@ -6070,7 +6130,7 @@ sub trace { return main::trace(@_); }
         return;
     }
 
-    sub is_equivalent_to {
+    sub is_set_equivalent_to {
         # Return boolean of whether or not the other object is a table of this
         # type and has been marked equivalent to this one.
 
@@ -6083,7 +6143,7 @@ sub trace { return main::trace(@_); }
         unless ($other->isa(__PACKAGE__)) {
             my $ref_other = ref $other;
             my $ref_self = ref $self;
-            Carp::my_carp_bug("Argument to 'is_equivalent_to' must be another $ref_self, not a '$ref_other'.  $other not set equivalent to $self.");
+            Carp::my_carp_bug("Argument to 'is_set_equivalent_to' must be another $ref_self, not a '$ref_other'.  $other not set equivalent to $self.");
             return 0;
         }
 
@@ -6101,13 +6161,6 @@ sub trace { return main::trace(@_); }
         my $other = shift;
         Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
-        unless ($other->isa(__PACKAGE__)) {
-            my $ref_other = ref $other;
-            my $ref_self = ref $self;
-            Carp::my_carp_bug("Argument to 'matches_identically_to' must be another $ref_self, not a '$ref_other'.  $other not set equivalent to $self.");
-            return 0;
-        }
-
         # These are ordered in increasing real time to figure out (at least
         # until a patch changes that and doesn't change this)
         return 0 if $self->max != $other->max;
@@ -6160,7 +6213,7 @@ sub trace { return main::trace(@_); }
 
         # If already are equivalent, no need to re-do it;  if subroutine
         # returns null, it found an error, also do nothing
-        my $are_equivalent = $self->is_equivalent_to($other);
+        my $are_equivalent = $self->is_set_equivalent_to($other);
         return if ! defined $are_equivalent || $are_equivalent;
 
         my $addr = do { no overloading; pack 'J', $self; };
@@ -7145,6 +7198,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
                     aliases
                     comment
                     complete_name
+                    containing_range
                     core_access
                     count
                     default_map
@@ -7177,6 +7231,7 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
                     status
                     status_info
                     to_output_map
+                    type_of
                     value_of
                     write
                 ))
@@ -7453,19 +7508,14 @@ sub force_unlink ($) {
     return;
 }
 
-sub write ($\@) {
-    # Given a filename and a reference to an array of lines, write the lines
-    # to the file
+sub write ($@) {
+    # Given a filename and references to arrays of lines, write the lines of
+    # each array to the file
     # Filename can be given as an arrayref of directory names
 
-    my $file  = shift;
-    my $lines_ref = shift;
-    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+    return Carp::carp_too_few_args(\@_, 2) if main::DEBUG && @_ < 2;
 
-    if (! defined $lines_ref) {
-        Carp::my_carp("Missing lines to write parameter for $file.  Writing skipped;");
-        return;
-    }
+    my $file  = shift;
 
     # Get into a single string if an array, and get rid of, in Unix terms, any
     # leading '.'
@@ -7478,10 +7528,6 @@ sub write ($\@) {
 
     push @files_actually_output, $file;
 
-    unless (@$lines_ref) {
-        Carp::my_carp("Output file '$file' is empty; writing it anyway;");
-    }
-
     force_unlink ($file);
 
     my $OUT;
@@ -7490,7 +7536,13 @@ sub write ($\@) {
         return;
     }
 
-    print $OUT @$lines_ref or die Carp::my_carp("write to '$file' failed: $!");
+    while (defined (my $lines_ref = shift)) {
+        unless (@$lines_ref) {
+            Carp::my_carp("An array of lines for writing to file '$file' is empty; writing it anyway;");
+        }
+
+        print $OUT @$lines_ref or die Carp::my_carp("write to '$file' failed: $!");
+    }
     close $OUT or die Carp::my_carp("close '$file' failed: $!");
 
     print "$file written.\n" if $verbosity >= $VERBOSE;
@@ -9151,7 +9203,7 @@ END
         my $Perl_decomp = Property->new('Perl_Decomposition_Mapping',
                                         Directory => File::Spec->curdir(),
                                         File => 'Decomposition',
-                                        Format => $STRING_FORMAT,
+                                        Format => $DECOMP_STRING_FORMAT,
                                         Internal_Only_Warning => 1,
                                         Perl_Extension => 1,
                                         Default_Map => $CODE_POINT,
@@ -9587,7 +9639,6 @@ END
             # essentially be this code.)  This uses the algorithm published by
             # Unicode.
             if (property_ref('Decomposition_Mapping')->to_output_map) {
-        local $to_trace = 1 if main::DEBUG;
                 for (my $S = $SBase; $S < $SBase + $SCount; $S++) {
                     use integer;
                     my $SIndex = $S - $SBase;
@@ -9982,6 +10033,7 @@ sub filter_old_style_case_folding {
     # it takes no part in anything we do.
     my $to_output_simple;
 
+    # XXX
     # These are experimental, perhaps will need these to pass to regcomp.c to
     # handle the cases where for example the Kelvin sign character folds to k,
     # and in regcomp, we need to know which of the characters can have a
@@ -10050,7 +10102,7 @@ sub filter_old_style_case_folding {
             $file->insert_adjusted_lines("$range; Simple_Case_Folding; $map");
         }
 
-        # Experimental, see comment above
+        # XXX Experimental, see comment above
         if ($type ne 'S' && hex($range) >= 256) {   # assumes range is 1 point
             my @folded = split ' ', $map;
             if (hex $folded[0] < 256 && @folded == 1) {
@@ -10065,7 +10117,7 @@ sub filter_old_style_case_folding {
     }
 
     sub post_fold {
-        # Experimental, see comment above
+        # XXX Experimental, see comment above
         return;
 
         #local $to_trace = 1 if main::DEBUG;
@@ -11472,7 +11524,7 @@ sub add_perl_synonyms() {
                 # name.  We could be in trouble, but not if this is just a
                 # synonym for another table that we have already made a child
                 # of the pre-existing one.
-                if ($pre_existing->is_equivalent_to($actual)) {
+                if ($pre_existing->is_set_equivalent_to($actual)) {
                     trace "$pre_existing is already equivalent to $actual; adding alias perl=$proposed_name to it" if main::DEBUG && $to_trace;
                     $pre_existing->add_alias($proposed_name);
                     next;
@@ -12689,7 +12741,7 @@ L<perlunicode>
 END
 
     # And write it.
-    main::write([ $pod_directory, "$pod_file.pod" ], @OUT);
+    main::write([ $pod_directory, "$pod_file.pod" ], \@OUT);
     return;
 }
 
@@ -12750,7 +12802,7 @@ END
 1;
 END
 
-    main::write("Heavy.pl", @heavy);
+    main::write("Heavy.pl", \@heavy);
     return;
 }
 
@@ -13801,6 +13853,7 @@ File::Find::find({
 }, File::Spec->curdir());
 
 my @mktables_list_output_files;
+my $old_start_time = 0;
 
 if (! -e $file_list) {
     print "'$file_list' doesn't exist, so forcing rebuild.\n" if $verbosity >= $VERBOSE;
@@ -13823,6 +13876,9 @@ else {
         for my $list ( \@input, \@mktables_list_output_files ) {
             while (<$file_handle>) {
                 s/^ \s+ | \s+ $//xg;
+                if (/^ \s* \# .* Autogenerated\ starting\ on\ (\d+)/x) {
+                    $old_start_time = $1;
+                }
                 next if /^ \s* (?: \# .* )? $/x;
                 last if /^ =+ $/x;
                 my ( $file ) = split /\t/;
@@ -13930,12 +13986,12 @@ if ( $verbosity >= $VERBOSE ) {
          "Checking ".scalar( @mktables_list_output_files )." output files.\n";
 }
 
-# We set $youngest to be the most recently changed input file, including this
-# program itself (done much earlier in this file)
+# We set $most_recent to be the most recently changed input file, including
+# this program itself (done much earlier in this file)
 foreach my $in (@input_files) {
-    my $age = -M $in;
-    next unless defined $age;        # Keep going even if missing a file
-    $youngest = $age if $age < $youngest;
+    next unless -e $in;        # Keep going even if missing a file
+    my $mod_time = (stat $in)[9];
+    $most_recent = $mod_time if $mod_time > $most_recent;
 
     # See that the input files have distinct names, to warn someone if they
     # are adding a new one
@@ -13948,29 +14004,30 @@ foreach my $in (@input_files) {
     }
 }
 
-my $ok = ! $write_unchanged_files
-        && scalar @mktables_list_output_files;        # If none known, rebuild
+my $rebuild = $write_unchanged_files    # Rebuild: if unconditional rebuild
+              || ! scalar @mktables_list_output_files  # or if no outputs known
+              || $old_start_time < $most_recent;       # or out-of-date
 
 # Now we check to see if any output files are older than youngest, if
 # they are, we need to continue on, otherwise we can presumably bail.
-if ($ok) {
+if (! $rebuild) {
     foreach my $out (@mktables_list_output_files) {
         if ( ! file_exists($out)) {
             print "'$out' is missing.\n" if $verbosity >= $VERBOSE;
-            $ok = 0;
+            $rebuild = 1;
             last;
          }
         #local $to_trace = 1 if main::DEBUG;
-        trace $youngest, -M $out if main::DEBUG && $to_trace;
-        if ( -M $out > $youngest ) {
-            #trace "$out: age: ", -M $out, ", youngest: $youngest\n" if main::DEBUG && $to_trace;
+        trace $most_recent, (stat $out)[9] if main::DEBUG && $to_trace;
+        if ( (stat $out)[9] <= $most_recent ) {
+            #trace "$out:  most recent mod time: ", (stat $out)[9], ", youngest: $most_recent\n" if main::DEBUG && $to_trace;
             print "'$out' is too old.\n" if $verbosity >= $VERBOSE;
-            $ok = 0;
+            $rebuild = 1;
             last;
         }
     }
 }
-if ($ok) {
+if (! $rebuild) {
     print "Files seem to be ok, not bothering to rebuild.  Add '-w' option to force build\n";
     exit(0);
 }
@@ -14014,11 +14071,12 @@ if ( $file_list and $make_list ) {
         return
     }
     else {
+        my $localtime = localtime $start_time;
         print $ofh <<"END";
 #
 # $file_list -- File list for $0.
 #
-#   Autogenerated on @{[scalar localtime]}
+#   Autogenerated starting on $start_time ($localtime)
 #
 # - First section is input files
 #   ($0 itself is not listed but is automatically considered an input)