# that instituted the change to main::objaddr, and subsequent commits that
# changed 0+$self to pack 'J', $self.)
+my $start_time;
+BEGIN { # Get the time the script started running; do it at compiliation to
+ # get it as close as possible
+ $start_time= time;
+}
+
+
require 5.010_001;
use strict;
use warnings;
# Stores the most-recently changed file. If none have changed, can skip the
# build
-my $youngest = -M $0; # Do this before the chdir!
+my $most_recent = (stat $0)[9]; # Do this before the chdir!
# Change directories now, because need to read 'version' early.
if ($use_directory) {
$DEPRECATED => 'deprecated',
);
-# The format of the values of the map tables:
+# The format of the values of the tables:
+my $EMPTY_FORMAT = "";
my $BINARY_FORMAT = 'b';
my $DECIMAL_FORMAT = 'd';
my $FLOAT_FORMAT = 'f';
my $HEX_FORMAT = 'x';
my $RATIONAL_FORMAT = 'r';
my $STRING_FORMAT = 's';
+my $DECOMP_STRING_FORMAT = 'c';
my %map_table_formats = (
$BINARY_FORMAT => 'binary',
$INTEGER_FORMAT => 'integer',
$HEX_FORMAT => 'positive hex whole number; a code point',
$RATIONAL_FORMAT => 'rational: an integer or a fraction',
- $STRING_FORMAT => 'arbitrary string',
+ $STRING_FORMAT => 'string',
+ $DECOMP_STRING_FORMAT => 'Perl\'s internal (Normalize.pm) decompostion mapping',
);
# Unicode didn't put such derived files in a separate directory at first.
return $i + 1;
}
- sub value_of {
- # Returns the value associated with the code point, undef if none
+ sub containing_range {
+ # Returns the range object that contains the code point, undef if none
my $self = shift;
my $codepoint = shift;
# contains() returns 1 beyond where we should look
no overloading;
- return $ranges{pack 'J', $self}->[$i-1]->value;
+ return $ranges{pack 'J', $self}->[$i-1];
+ }
+
+ sub value_of {
+ # Returns the value associated with the code point, undef if none
+
+ my $self = shift;
+ my $codepoint = shift;
+ Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+ my $range = $self->containing_range($codepoint);
+ return unless defined $range;
+
+ return $range->value;
+ }
+
+ sub type_of {
+ # Returns the type of the range containing the code point, undef if
+ # the code point is not in the table
+
+ my $self = shift;
+ my $codepoint = shift;
+ Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+ my $range = $self->containing_range($codepoint);
+ return unless defined $range;
+
+ return $range->type;
}
sub _search_ranges {
# The constructor can override the global flag of the same name.
main::set_access('output_range_counts', \%output_range_counts, 'r');
+ my %format;
+ # The format of the entries of the table. This is calculated from the
+ # data in the table (or passed in the constructor). This is an enum e.g.,
+ # $STRING_FORMAT
+ main::set_access('format', \%format, 'r', 'p_s');
+
sub new {
# All arguments are key => value pairs, which you can see below, most
# of which match fields documented above. Otherwise: Pod_Entry,
$full_name{$addr} = delete $args{'Full_Name'};
my $complete_name = $complete_name{$addr}
= delete $args{'Complete_Name'};
+ $format{$addr} = delete $args{'Format'};
$internal_only{$addr} = delete $args{'Internal_Only_Warning'} || 0;
$output_range_counts{$addr} = delete $args{'Output_Range_Counts'};
$property{$addr} = delete $args{'_Property'};
# Here are the methods that are required to be defined by any derived
# class
for my $sub (qw(
+ handle_special_range
append_to_body
pre_body
))
- # append_to_body and pre_body are called in the write() method
- # to add stuff after the main body of the table, but before
- # its close; and to prepend stuff before the beginning of the
+ # write() knows how to write out normal ranges, but it calls
+ # handle_special_range() when it encounters a non-normal one.
+ # append_to_body() is called by it after it has handled all
+ # ranges to add anything after the main portion of the table.
+ # And finally, pre_body() is called after all this to build up
+ # anything that should appear before the main portion of the
+ # table. Doing it this way allows things in the middle to
+ # affect what should appear before the main portion of the
# table.
{
no strict "refs";
}
sub write {
- # Write a representation of the table to its file.
+ # Write a representation of the table to its file. It calls several
+ # functions furnished by sub-classes of this abstract base class to
+ # handle non-normal ranges, to add stuff before the table, and at its
+ # end.
my $self = shift;
my $tab_stops = shift; # The number of tab stops over to put any
my $addr = do { no overloading; pack 'J', $self; };
# Start with the header
- my @OUT = $self->header;
+ my @HEADER = $self->header;
# Then the comments
- push @OUT, "\n", main::simple_fold($comment{$addr}, '# '), "\n"
+ push @HEADER, "\n", main::simple_fold($comment{$addr}, '# '), "\n"
if $comment{$addr};
- # Then any pre-body stuff.
- my $pre_body = $self->pre_body;
- push @OUT, $pre_body, "\n" if $pre_body;
+ # Things discovered processing the main body of the document may
+ # affect what gets output before it, therefore pre_body() isn't called
+ # until after all other processing of the table is done.
- # The main body looks like a 'here' document
+ # The main body looks like a 'here' document.
+ my @OUT;
push @OUT, "return <<'END';\n";
if ($range_list{$addr}->is_empty) {
my $range_size_1 = $range_size_1{$addr};
# Output each range as part of the here document.
+ RANGE:
for my $set ($range_list{$addr}->ranges) {
+ if ($set->type != 0) {
+ $self->handle_special_range($set);
+ next RANGE;
+ }
my $start = $set->start;
my $end = $set->end;
my $value = $set->value;
# Don't output ranges whose value is the one to suppress
- next if defined $suppress_value && $value eq $suppress_value;
+ next RANGE if defined $suppress_value && $value eq $suppress_value;
# If has or wants a single point range output
if ($start == $end || $range_size_1) {
# And finish the here document.
push @OUT, "END\n";
+ # Done with the main portion of the body. Can now figure out what
+ # should appear before it in the file.
+ my $pre_body = $self->pre_body;
+ push @HEADER, $pre_body, "\n" if $pre_body;
+
# All these files have a .pl suffix
$file_path{$addr}->[-1] .= '.pl';
- main::write($file_path{$addr}, \@OUT);
+ main::write($file_path{$addr}, \@HEADER, \@OUT);
return;
}
# Accessors for the range list stored in this table. First for
# unconditional
for my $sub (qw(
+ containing_range
contains
count
each_range
min
range_count
reset_each_range
+ type_of
value_of
))
{
\%anomalous_entries,
'readable_array');
- my %format;
- # The format of the entries of the table. This is calculated from the
- # data in the table (or passed in the constructor). This is an enum e.g.,
- # $STRING_FORMAT
- main::set_access('format', \%format);
-
my %core_access;
# This is a string, solely for documentation, indicating how one can get
# access to this property via the Perl core.
main::set_access('core_access', \%core_access, 'r', 's');
- my %has_specials;
- # Boolean set when non-zero map-type ranges are added to this table,
- # which happens in only a few tables. This is purely for performance, to
- # avoid having to search through every table upon output, so if all the
- # non-zero maps got deleted before output, this would remain set, and the
- # only penalty would be performance. Currently, most map tables that get
- # output have specials in them, so this doesn't help that much anyway.
- main::set_access('has_specials', \%has_specials);
-
my %to_output_map;
# Boolean as to whether or not to write out this map table
main::set_access('to_output_map', \%to_output_map, 's');
my $core_access = delete $args{'Core_Access'};
my $default_map = delete $args{'Default_Map'};
- my $format = delete $args{'Format'};
my $property = delete $args{'_Property'};
my $full_name = delete $args{'Full_Name'};
# Rest of parameters passed on
$anomalous_entries{$addr} = [];
$core_access{$addr} = $core_access;
$default_map{$addr} = $default_map;
- $format{$addr} = $format;
$self->initialize($initialize) if defined $initialize;
my $addr = do { no overloading; pack 'J', $self; };
- $has_specials{$addr} = 1 if $type;
-
$self->_range_list->add_map($lower, $upper,
$string,
@_,
Replace => $UNCONDITIONALLY);
}
- # Copy the specials information from the other table to $self
- if ($has_specials{$other_addr}) {
- $has_specials{$addr} = 1;
- }
-
return;
}
my %swash_keys; # Makes sure don't duplicate swash names.
+ # The remaining variables are temporaries used while writing each table,
+ # to output special ranges.
+ my $has_hangul_syllables;
+ my @multi_code_point_maps; # Map is to more than one code point.
+
+ # The key is the base name of the code point, and the value is an
+ # array giving all the ranges that use this base name. Each range
+ # is actually a hash giving the 'low' and 'high' values of it.
+ my %names_ending_in_code_point;
+
+ # Inverse mapping. The list of ranges that have these kinds of
+ # names. Each element contains the low, high, and base names in a
+ # hash.
+ my @code_points_ending_in_code_point;
+
+ sub handle_special_range {
+ # Called in the middle of write when it finds a range it doesn't know
+ # how to handle.
+
+ my $self = shift;
+ my $range = shift;
+ Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+ my $addr = do { no overloading; pack 'J', $self; };
+
+ my $type = $range->type;
+
+ my $low = $range->start;
+ my $high = $range->end;
+ my $map = $range->value;
+
+ # No need to output the range if it maps to the default.
+ return if $map eq $default_map{$addr};
+
+ # Switch based on the map type...
+ if ($type == $HANGUL_SYLLABLE) {
+
+ # These are entirely algorithmically determinable based on
+ # some constants furnished by Unicode; for now, just set a
+ # flag to indicate that have them. After everything is figured
+ # out, we will output the code that does the algorithm.
+ $has_hangul_syllables = 1;
+ }
+ elsif ($type == $CP_IN_NAME) {
+
+ # Code points whose the name ends in their code point are also
+ # algorithmically determinable, but need information about the map
+ # to do so. Both the map and its inverse are stored in data
+ # structures output in the file.
+ push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
+ push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
+
+ push @code_points_ending_in_code_point, { low => $low,
+ high => $high,
+ name => $map
+ };
+ }
+ elsif ($range->type == $MULTI_CP || $range->type == $NULL) {
+
+ # Multi-code point maps and null string maps have an entry
+ # for each code point in the range. They use the same
+ # output format.
+ for my $code_point ($low .. $high) {
+
+ # The pack() below can't cope with surrogates.
+ if ($code_point >= 0xD800 && $code_point <= 0xDFFF) {
+ Carp::my_carp("Surrogage code point '$code_point' in mapping to '$map' in $self. No map created");
+ next;
+ }
+
+ # Generate the hash entries for these in the form that
+ # utf8.c understands.
+ my $tostr = "";
+ my $to_name = "";
+ my $to_chr = "";
+ foreach my $to (split " ", $map) {
+ if ($to !~ /^$code_point_re$/) {
+ Carp::my_carp("Illegal code point '$to' in mapping '$map' from $code_point in $self. No map created");
+ next;
+ }
+ $tostr .= sprintf "\\x{%s}", $to;
+ $to = CORE::hex $to;
+ }
+
+ # I (khw) have never waded through this line to
+ # understand it well enough to comment it.
+ my $utf8 = sprintf(qq["%s" => "$tostr",],
+ join("", map { sprintf "\\x%02X", $_ }
+ unpack("U0C*", pack("U", $code_point))));
+
+ # Add a comment so that a human reader can more easily
+ # see what's going on.
+ push @multi_code_point_maps,
+ sprintf("%-45s # U+%04X", $utf8, $code_point);
+ $multi_code_point_maps[-1] .= " => $map";
+ }
+ }
+ else {
+ Carp::my_carp("Unrecognized map type '$range->type' in '$range' in $self. Not written");
+ }
+
+ return;
+ }
+
sub pre_body {
# Returns the string that should be output in the file before the main
- # body of this table. This includes some hash entries identifying the
- # format of the body, and what the single value should be for all
- # ranges missing from it. It also includes any code points which have
- # map_types that don't go in the main table.
+ # body of this table. It isn't called until the main body is
+ # calculated, saving a pass. The string includes some hash entries
+ # identifying the format of the body, and what the single value should
+ # be for all ranges missing from it. It also includes any code points
+ # which have map_types that don't go in the main table.
my $self = shift;
Carp::carp_extra_args(\@_) if main::DEBUG && @_;
}
$swash_keys{$name} = "$self";
- my $default_map = $default_map{$addr};
-
my $pre_body = "";
- if ($has_specials{$addr}) {
-
- # Here, some maps with non-zero type have been added to the table.
- # Go through the table and handle each of them. None will appear
- # in the body of the table, so delete each one as we go. The
- # code point count has already been calculated, so ok to delete
- # now.
-
- my @multi_code_point_maps;
- my $has_hangul_syllables = 0;
-
- # The key is the base name of the code point, and the value is an
- # array giving all the ranges that use this base name. Each range
- # is actually a hash giving the 'low' and 'high' values of it.
- my %names_ending_in_code_point;
-
- # Inverse mapping. The list of ranges that have these kinds of
- # names. Each element contains the low, high, and base names in a
- # hash.
- my @code_points_ending_in_code_point;
-
- my $range_map = $self->_range_list;
- foreach my $range ($range_map->ranges) {
- next unless $range->type != 0;
- my $low = $range->start;
- my $high = $range->end;
- my $map = $range->value;
- my $type = $range->type;
-
- # No need to output the range if it maps to the default. And
- # the write method won't output it either, so no need to
- # delete it to keep it from being output, and is faster to
- # skip than to delete anyway.
- next if $map eq $default_map;
-
- # Delete the range to keep write() from trying to output it
- $range_map->delete_range($low, $high);
-
- # Switch based on the map type...
- if ($type == $HANGUL_SYLLABLE) {
-
- # These are entirely algorithmically determinable based on
- # some constants furnished by Unicode; for now, just set a
- # flag to indicate that have them. Below we will output
- # the code that does the algorithm.
- $has_hangul_syllables = 1;
- }
- elsif ($type == $CP_IN_NAME) {
-
- # If the name ends in the code point it represents, are
- # also algorithmically determinable, but need information
- # about the map to do so. Both the map and its inverse
- # are stored in data structures output in the file.
- push @{$names_ending_in_code_point{$map}->{'low'}}, $low;
- push @{$names_ending_in_code_point{$map}->{'high'}}, $high;
-
- push @code_points_ending_in_code_point, { low => $low,
- high => $high,
- name => $map
- };
- }
- elsif ($range->type == $MULTI_CP || $range->type == $NULL) {
- # Multi-code point maps and null string maps have an entry
- # for each code point in the range. They use the same
- # output format.
- for my $code_point ($low .. $high) {
-
- # The pack() below can't cope with surrogates.
- if ($code_point >= 0xD800 && $code_point <= 0xDFFF) {
- Carp::my_carp("Surrogage code point '$code_point' in mapping to '$map' in $self. No map created");
- next;
- }
-
- # Generate the hash entries for these in the form that
- # utf8.c understands.
- my $tostr = "";
- foreach my $to (split " ", $map) {
- if ($to !~ /^$code_point_re$/) {
- Carp::my_carp("Illegal code point '$to' in mapping '$map' from $code_point in $self. No map created");
- next;
- }
- $tostr .= sprintf "\\x{%s}", $to;
- }
-
- # I (khw) have never waded through this line to
- # understand it well enough to comment it.
- my $utf8 = sprintf(qq["%s" => "$tostr",],
- join("", map { sprintf "\\x%02X", $_ }
- unpack("U0C*", pack("U", $code_point))));
-
- # Add a comment so that a human reader can more easily
- # see what's going on.
- push @multi_code_point_maps,
- sprintf("%-45s # U+%04X => %s", $utf8,
- $code_point,
- $map);
- }
- }
- else {
- Carp::my_carp("Unrecognized map type '$range->type' in '$range' in $self. Using type 0 instead");
- $range_map->add_map($low, $high, $map, Replace => $UNCONDITIONALLY, Type => 0);
- }
- } # End of loop through all ranges
-
- # Here have gone through the whole file. If actually generated
- # anything for each map type, add its respective header and
- # trailer
- if (@multi_code_point_maps) {
- $pre_body .= <<END;
+ # Here we assume we were called after have gone through the whole
+ # file. If we actually generated anything for each map type, add its
+ # respective header and trailer
+ if (@multi_code_point_maps) {
+ $pre_body .= <<END;
# Some code points require special handling because their mappings are each to
# multiple code points. These do not appear in the main body, but are defined
# under "use bytes"). Each value is the UTF-8 of the translation, for speed.
%utf8::ToSpec$name = (
END
- $pre_body .= join("\n", @multi_code_point_maps) . "\n);\n";
- }
-
- if ($has_hangul_syllables || @code_points_ending_in_code_point) {
-
- # Convert these structures to output format.
- my $code_points_ending_in_code_point =
- main::simple_dumper(\@code_points_ending_in_code_point,
- ' ' x 8);
- my $names = main::simple_dumper(\%names_ending_in_code_point,
- ' ' x 8);
-
- # Do the same with the Hangul names,
- my $jamo;
- my $jamo_l;
- my $jamo_v;
- my $jamo_t;
- my $jamo_re;
- if ($has_hangul_syllables) {
-
- # Construct a regular expression of all the possible
- # combinations of the Hangul syllables.
- my @L_re; # Leading consonants
- for my $i ($LBase .. $LBase + $LCount - 1) {
- push @L_re, $Jamo{$i}
- }
- my @V_re; # Middle vowels
- for my $i ($VBase .. $VBase + $VCount - 1) {
- push @V_re, $Jamo{$i}
- }
- my @T_re; # Trailing consonants
- for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
- push @T_re, $Jamo{$i}
- }
-
- # The whole re is made up of the L V T combination.
- $jamo_re = '('
- . join ('|', sort @L_re)
- . ')('
- . join ('|', sort @V_re)
- . ')('
- . join ('|', sort @T_re)
- . ')?';
-
- # These hashes needed by the algorithm were generated
- # during reading of the Jamo.txt file
- $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
- $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
- $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
- $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
+ $pre_body .= join("\n", @multi_code_point_maps) . "\n);\n";
+ }
+
+ if ($has_hangul_syllables || @code_points_ending_in_code_point) {
+
+ # Convert these structures to output format.
+ my $code_points_ending_in_code_point =
+ main::simple_dumper(\@code_points_ending_in_code_point,
+ ' ' x 8);
+ my $names = main::simple_dumper(\%names_ending_in_code_point,
+ ' ' x 8);
+
+ # Do the same with the Hangul names,
+ my $jamo;
+ my $jamo_l;
+ my $jamo_v;
+ my $jamo_t;
+ my $jamo_re;
+ if ($has_hangul_syllables) {
+
+ # Construct a regular expression of all the possible
+ # combinations of the Hangul syllables.
+ my @L_re; # Leading consonants
+ for my $i ($LBase .. $LBase + $LCount - 1) {
+ push @L_re, $Jamo{$i}
+ }
+ my @V_re; # Middle vowels
+ for my $i ($VBase .. $VBase + $VCount - 1) {
+ push @V_re, $Jamo{$i}
+ }
+ my @T_re; # Trailing consonants
+ for my $i ($TBase + 1 .. $TBase + $TCount - 1) {
+ push @T_re, $Jamo{$i}
}
- $pre_body .= <<END;
+ # The whole re is made up of the L V T combination.
+ $jamo_re = '('
+ . join ('|', sort @L_re)
+ . ')('
+ . join ('|', sort @V_re)
+ . ')('
+ . join ('|', sort @T_re)
+ . ')?';
+
+ # These hashes needed by the algorithm were generated
+ # during reading of the Jamo.txt file
+ $jamo = main::simple_dumper(\%Jamo, ' ' x 8);
+ $jamo_l = main::simple_dumper(\%Jamo_L, ' ' x 8);
+ $jamo_v = main::simple_dumper(\%Jamo_V, ' ' x 8);
+ $jamo_t = main::simple_dumper(\%Jamo_T, ' ' x 8);
+ }
+
+ $pre_body .= <<END;
# To achieve significant memory savings when this file is read in,
# algorithmically derivable code points are omitted from the main body below.
$code_points_ending_in_code_point
);
END
- # Earlier releases didn't have Jamos. No sense outputting
- # them unless will be used.
- if ($has_hangul_syllables) {
- $pre_body .= <<END;
+ # Earlier releases didn't have Jamos. No sense outputting
+ # them unless will be used.
+ if ($has_hangul_syllables) {
+ $pre_body .= <<END;
# Convert from code point to Jamo short name for use in composing Hangul
# syllable names
my \$TCount = $TCount;
my \$NCount = \$VCount * \$TCount;
END
- } # End of has Jamos
+ } # End of has Jamos
- $pre_body .= << 'END';
+ $pre_body .= << 'END';
sub name_to_code_point_special {
my $name = shift;
# Returns undef if not one of the specially handled names; otherwise
# returns the code point equivalent to the input name
END
- if ($has_hangul_syllables) {
- $pre_body .= << 'END';
+ if ($has_hangul_syllables) {
+ $pre_body .= << 'END';
if (substr($name, 0, $HANGUL_SYLLABLE_LENGTH) eq $HANGUL_SYLLABLE) {
$name = substr($name, $HANGUL_SYLLABLE_LENGTH);
return ($L * $VCount + $V) * $TCount + $T + $SBase;
}
END
- }
- $pre_body .= << 'END';
+ }
+ $pre_body .= << 'END';
# Name must end in '-code_point' for this to handle.
if ($name !~ /^ (.*) - ($code_point_re) $/x) {
# Returns the name of a code point if algorithmically determinable;
# undef if not
END
- if ($has_hangul_syllables) {
- $pre_body .= << 'END';
+ if ($has_hangul_syllables) {
+ $pre_body .= << 'END';
# If in the Hangul range, calculate the name based on Unicode's
# algorithm
return $name;
}
END
- }
- $pre_body .= << 'END';
+ }
+ $pre_body .= << 'END';
# Look through list of these code points for one in range.
foreach my $hash (@code_points_ending_in_code_point) {
} # End closure
END
- } # End of has hangul or code point in name maps.
- } # End of has specials
+ } # End of has hangul or code point in name maps.
+
+ my $format = $self->format;
+
+ my $return = <<END;
+# The name this swash is to be known by, with the format of the mappings in
+# the main body of the table, and what all code points missing from this file
+# map to.
+\$utf8::SwashInfo{'To$name'}{'format'} = '$format'; # $map_table_formats{$format}
+END
+ my $default_map = $default_map{$addr};
+ $return .= "\$utf8::SwashInfo{'To$name'}{'missing'} = '$default_map';";
+
+ if ($default_map eq $CODE_POINT) {
+ $return .= ' # code point maps to itself';
+ }
+ elsif ($default_map eq "") {
+ $return .= ' # code point maps to the null string';
+ }
+ $return .= "\n";
+
+ $return .= $pre_body;
+
+ return $return;
+ }
+
+ sub write {
+ # Write the table to the file.
+
+ my $self = shift;
+ Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
+ my $addr = do { no overloading; pack 'J', $self; };
+
+ # Clear the temporaries
+ $has_hangul_syllables = 0;
+ undef @multi_code_point_maps;
+ undef %names_ending_in_code_point;
+ undef @code_points_ending_in_code_point;
# Calculate the format of the table if not already done.
- my $format = $format{$addr};
- my $property = $self->property;
- my $type = $property->type;
+ my $format = $self->format;
+ my $type = $self->property->type;
+ my $default_map = $self->default_map;
if (! defined $format) {
if ($type == $BINARY) {
# most restrictive, and so on.
$format = $DECIMAL_FORMAT;
foreach my $range (@ranges) {
+ next if $range->type != 0; # Non-normal ranges don't
+ # affect the main body
my $map = $range->value;
if ($map ne $default_map) {
last if $format eq $STRING_FORMAT; # already at
}
} # end of calculating format
- my $return = <<END;
-# The name this swash is to be known by, with the format of the mappings in
-# the main body of the table, and what all code points missing from this file
-# map to.
-\$utf8::SwashInfo{'To$name'}{'format'} = '$format'; # $map_table_formats{$format}
-END
- my $missing = $default_map;
- if ($missing eq $CODE_POINT
+ if ($default_map eq $CODE_POINT
&& $format ne $HEX_FORMAT
- && ! defined $format{$addr}) # Is expected if was manually set
+ && ! defined $self->format) # manual settings are always
+ # considered ok
{
Carp::my_carp_bug("Expecting hex format for mapping table for $self, instead got '$format'")
}
- $format{$addr} = $format;
- $return .= "\$utf8::SwashInfo{'To$name'}{'missing'} = '$missing';";
- if ($missing eq $CODE_POINT) {
- $return .= ' # code point maps to itself';
- }
- elsif ($missing eq "") {
- $return .= ' # code point maps to the null string';
- }
- $return .= "\n";
-
- $return .= $pre_body;
-
- return $return;
- }
-
- sub write {
- # Write the table to the file.
-
- my $self = shift;
- Carp::carp_extra_args(\@_) if main::DEBUG && @_;
- my $addr = do { no overloading; pack 'J', $self; };
+ $self->_set_format($format);
return $self->SUPER::write(
($self->property == $block)
? 7 # block file needs more tab stops
: 3,
- $default_map{$addr}); # don't write defaulteds
+ $default_map); # don't write defaulteds
}
# Accessors for the underlying list that should fail if locked.
# Optional
my $initialize = delete $args{'Initialize'};
my $matches_all = delete $args{'Matches_All'} || 0;
+ my $format = delete $args{'Format'};
# Rest of parameters passed on.
my $range_list = Range_List->new(Initialize => $initialize,
Full_Name => $full_name,
_Property => $property,
_Range_List => $range_list,
+ Format => $EMPTY_FORMAT,
);
my $addr = do { no overloading; pack 'J', $self; };
$leader{$addr} = $self;
$parent{$addr} = $self;
+ if (defined $format && $format ne $EMPTY_FORMAT) {
+ Carp::my_carp_bug("'Format' must be '$EMPTY_FORMAT' in a match table instead of '$format'. Using '$EMPTY_FORMAT'");
+ }
+
return $self;
}
return;
}
- sub is_equivalent_to {
+ sub is_set_equivalent_to {
# Return boolean of whether or not the other object is a table of this
# type and has been marked equivalent to this one.
unless ($other->isa(__PACKAGE__)) {
my $ref_other = ref $other;
my $ref_self = ref $self;
- Carp::my_carp_bug("Argument to 'is_equivalent_to' must be another $ref_self, not a '$ref_other'. $other not set equivalent to $self.");
+ Carp::my_carp_bug("Argument to 'is_set_equivalent_to' must be another $ref_self, not a '$ref_other'. $other not set equivalent to $self.");
return 0;
}
my $other = shift;
Carp::carp_extra_args(\@_) if main::DEBUG && @_;
- unless ($other->isa(__PACKAGE__)) {
- my $ref_other = ref $other;
- my $ref_self = ref $self;
- Carp::my_carp_bug("Argument to 'matches_identically_to' must be another $ref_self, not a '$ref_other'. $other not set equivalent to $self.");
- return 0;
- }
-
# These are ordered in increasing real time to figure out (at least
# until a patch changes that and doesn't change this)
return 0 if $self->max != $other->max;
# If already are equivalent, no need to re-do it; if subroutine
# returns null, it found an error, also do nothing
- my $are_equivalent = $self->is_equivalent_to($other);
+ my $are_equivalent = $self->is_set_equivalent_to($other);
return if ! defined $are_equivalent || $are_equivalent;
my $addr = do { no overloading; pack 'J', $self; };
aliases
comment
complete_name
+ containing_range
core_access
count
default_map
status
status_info
to_output_map
+ type_of
value_of
write
))
return;
}
-sub write ($\@) {
- # Given a filename and a reference to an array of lines, write the lines
- # to the file
+sub write ($@) {
+ # Given a filename and references to arrays of lines, write the lines of
+ # each array to the file
# Filename can be given as an arrayref of directory names
- my $file = shift;
- my $lines_ref = shift;
- Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+ return Carp::carp_too_few_args(\@_, 2) if main::DEBUG && @_ < 2;
- if (! defined $lines_ref) {
- Carp::my_carp("Missing lines to write parameter for $file. Writing skipped;");
- return;
- }
+ my $file = shift;
# Get into a single string if an array, and get rid of, in Unix terms, any
# leading '.'
push @files_actually_output, $file;
- unless (@$lines_ref) {
- Carp::my_carp("Output file '$file' is empty; writing it anyway;");
- }
-
force_unlink ($file);
my $OUT;
return;
}
- print $OUT @$lines_ref or die Carp::my_carp("write to '$file' failed: $!");
+ while (defined (my $lines_ref = shift)) {
+ unless (@$lines_ref) {
+ Carp::my_carp("An array of lines for writing to file '$file' is empty; writing it anyway;");
+ }
+
+ print $OUT @$lines_ref or die Carp::my_carp("write to '$file' failed: $!");
+ }
close $OUT or die Carp::my_carp("close '$file' failed: $!");
print "$file written.\n" if $verbosity >= $VERBOSE;
my $Perl_decomp = Property->new('Perl_Decomposition_Mapping',
Directory => File::Spec->curdir(),
File => 'Decomposition',
- Format => $STRING_FORMAT,
+ Format => $DECOMP_STRING_FORMAT,
Internal_Only_Warning => 1,
Perl_Extension => 1,
Default_Map => $CODE_POINT,
# essentially be this code.) This uses the algorithm published by
# Unicode.
if (property_ref('Decomposition_Mapping')->to_output_map) {
- local $to_trace = 1 if main::DEBUG;
for (my $S = $SBase; $S < $SBase + $SCount; $S++) {
use integer;
my $SIndex = $S - $SBase;
# it takes no part in anything we do.
my $to_output_simple;
+ # XXX
# These are experimental, perhaps will need these to pass to regcomp.c to
# handle the cases where for example the Kelvin sign character folds to k,
# and in regcomp, we need to know which of the characters can have a
$file->insert_adjusted_lines("$range; Simple_Case_Folding; $map");
}
- # Experimental, see comment above
+ # XXX Experimental, see comment above
if ($type ne 'S' && hex($range) >= 256) { # assumes range is 1 point
my @folded = split ' ', $map;
if (hex $folded[0] < 256 && @folded == 1) {
}
sub post_fold {
- # Experimental, see comment above
+ # XXX Experimental, see comment above
return;
#local $to_trace = 1 if main::DEBUG;
# name. We could be in trouble, but not if this is just a
# synonym for another table that we have already made a child
# of the pre-existing one.
- if ($pre_existing->is_equivalent_to($actual)) {
+ if ($pre_existing->is_set_equivalent_to($actual)) {
trace "$pre_existing is already equivalent to $actual; adding alias perl=$proposed_name to it" if main::DEBUG && $to_trace;
$pre_existing->add_alias($proposed_name);
next;
END
# And write it.
- main::write([ $pod_directory, "$pod_file.pod" ], @OUT);
+ main::write([ $pod_directory, "$pod_file.pod" ], \@OUT);
return;
}
1;
END
- main::write("Heavy.pl", @heavy);
+ main::write("Heavy.pl", \@heavy);
return;
}
}, File::Spec->curdir());
my @mktables_list_output_files;
+my $old_start_time = 0;
if (! -e $file_list) {
print "'$file_list' doesn't exist, so forcing rebuild.\n" if $verbosity >= $VERBOSE;
for my $list ( \@input, \@mktables_list_output_files ) {
while (<$file_handle>) {
s/^ \s+ | \s+ $//xg;
+ if (/^ \s* \# .* Autogenerated\ starting\ on\ (\d+)/x) {
+ $old_start_time = $1;
+ }
next if /^ \s* (?: \# .* )? $/x;
last if /^ =+ $/x;
my ( $file ) = split /\t/;
"Checking ".scalar( @mktables_list_output_files )." output files.\n";
}
-# We set $youngest to be the most recently changed input file, including this
-# program itself (done much earlier in this file)
+# We set $most_recent to be the most recently changed input file, including
+# this program itself (done much earlier in this file)
foreach my $in (@input_files) {
- my $age = -M $in;
- next unless defined $age; # Keep going even if missing a file
- $youngest = $age if $age < $youngest;
+ next unless -e $in; # Keep going even if missing a file
+ my $mod_time = (stat $in)[9];
+ $most_recent = $mod_time if $mod_time > $most_recent;
# See that the input files have distinct names, to warn someone if they
# are adding a new one
}
}
-my $ok = ! $write_unchanged_files
- && scalar @mktables_list_output_files; # If none known, rebuild
+my $rebuild = $write_unchanged_files # Rebuild: if unconditional rebuild
+ || ! scalar @mktables_list_output_files # or if no outputs known
+ || $old_start_time < $most_recent; # or out-of-date
# Now we check to see if any output files are older than youngest, if
# they are, we need to continue on, otherwise we can presumably bail.
-if ($ok) {
+if (! $rebuild) {
foreach my $out (@mktables_list_output_files) {
if ( ! file_exists($out)) {
print "'$out' is missing.\n" if $verbosity >= $VERBOSE;
- $ok = 0;
+ $rebuild = 1;
last;
}
#local $to_trace = 1 if main::DEBUG;
- trace $youngest, -M $out if main::DEBUG && $to_trace;
- if ( -M $out > $youngest ) {
- #trace "$out: age: ", -M $out, ", youngest: $youngest\n" if main::DEBUG && $to_trace;
+ trace $most_recent, (stat $out)[9] if main::DEBUG && $to_trace;
+ if ( (stat $out)[9] <= $most_recent ) {
+ #trace "$out: most recent mod time: ", (stat $out)[9], ", youngest: $most_recent\n" if main::DEBUG && $to_trace;
print "'$out' is too old.\n" if $verbosity >= $VERBOSE;
- $ok = 0;
+ $rebuild = 1;
last;
}
}
}
-if ($ok) {
+if (! $rebuild) {
print "Files seem to be ok, not bothering to rebuild. Add '-w' option to force build\n";
exit(0);
}
return
}
else {
+ my $localtime = localtime $start_time;
print $ofh <<"END";
#
# $file_list -- File list for $0.
#
-# Autogenerated on @{[scalar localtime]}
+# Autogenerated starting on $start_time ($localtime)
#
# - First section is input files
# ($0 itself is not listed but is automatically considered an input)