+sub handle_compare_versions () {
+ # This fixes things up for the $compare_versions capability, where we
+ # compare Unicode version X with version Y (with Y > X), and we are
+ # running it on the Unicode Data for version Y.
+ #
+ # It works by calculating the code points whose meaning has been specified
+ # after release X, by using the Age property. The complement of this set
+ # is the set of code points whose meaning is unchanged between the
+ # releases. This is the set the program restricts itself to. It includes
+ # everything whose meaning has been specified by the time version X came
+ # along, plus those still unassigned by the time of version Y. (We will
+ # continue to use the word 'assigned' to mean 'meaning has been
+ # specified', as it's shorter and is accurate in all cases except the
+ # Noncharacter code points.)
+ #
+ # This function is run after all the properties specified by Unicode have
+ # been calculated for release Y. This makes sure we get all the nuances
+ # of Y's rules. (It is done before the Perl extensions are calculated, as
+ # those are based entirely on the Unicode ones.) But doing it after the
+ # Unicode table calculations means we have to fix up the Unicode tables.
+ # We do this by subtracting the code points that have been assigned since
+ # X (which is actually done by ANDing each table of assigned code points
+ # with the set of unchanged code points). Most Unicode properties are of
+ # the form such that all unassigned code points have a default, grab-bag,
+ # property value which is changed when the code point gets assigned. For
+ # these, we just remove the changed code points from the table for the
+ # latter property value, and add them back in to the grab-bag one. A few
+ # other properties are not entirely of this form and have values for some
+ # or all unassigned code points that are not the grab-bag one. These have
+ # to be handled specially, and are hard-coded in to this routine based on
+ # manual inspection of the Unicode character database. A list of the
+ # outlier code points is made for each of these properties, and those
+ # outliers are excluded from adding and removing from tables.
+ #
+ # Note that there are glitches when comparing against Unicode 1.1, as some
+ # Hangul syllables in it were later ripped out and eventually replaced
+ # with other things.
+
+ print "Fixing up for version comparison\n" if $verbosity >= $PROGRESS;
+
+ my $after_first_version = "All matching code points were added after "
+ . "Unicode $string_compare_versions";
+
+ # Calculate the delta as those code points that have been newly assigned
+ # since the first compare version.
+ my $delta = Range_List->new();
+ foreach my $table ($age->tables) {
+ use version;
+ next if $table == $age->table('Unassigned');
+ next if version->parse($table->name)
+ le version->parse($string_compare_versions);
+ $delta += $table;
+ }
+ if ($delta->is_empty) {
+ die ("No changes; perhaps you need a 'DAge.txt' file?");
+ }
+
+ my $unchanged = ~ $delta;
+
+ calculate_Assigned() if ! defined $Assigned;
+ $Assigned &= $unchanged;
+
+ # $Assigned now contains the code points that were assigned as of Unicode
+ # version X.
+
+ # A block is all or nothing. If nothing is assigned in it, it all goes
+ # back to the No_Block pool; but if even one code point is assigned, the
+ # block is retained.
+ my $no_block = $block->table('No_Block');
+ foreach my $this_block ($block->tables) {
+ next if $this_block == $no_block
+ || ! ($this_block & $Assigned)->is_empty;
+ $this_block->set_fate($SUPPRESSED, $after_first_version);
+ foreach my $range ($this_block->ranges) {
+ $block->replace_map($range->start, $range->end, 'No_Block')
+ }
+ $no_block += $this_block;
+ }
+
+ my @special_delta_properties; # List of properties that have to be
+ # handled specially.
+ my %restricted_delta; # Keys are the entries in
+ # @special_delta_properties; values
+ # are the range list of the code points
+ # that behave normally when they get
+ # assigned.
+
+ # In the next three properties, the Default Ignorable code points are
+ # outliers.
+ calculate_DI();
+ $DI &= $unchanged;
+
+ push @special_delta_properties, property_ref('_Perl_GCB');
+ $restricted_delta{$special_delta_properties[-1]} = ~ $DI;
+
+ if (defined (my $cwnfkcc = property_ref('Changes_When_NFKC_Casefolded')))
+ {
+ push @special_delta_properties, $cwnfkcc;
+ $restricted_delta{$special_delta_properties[-1]} = ~ $DI;
+ }
+
+ calculate_NChar(); # Non-character code points
+ $NChar &= $unchanged;
+
+ # This may have to be updated from time-to-time to get the most accurate
+ # results.
+ my $default_BC_non_LtoR = Range_List->new(Initialize =>
+ # These came from the comments in v8.0 DBidiClass.txt
+ [ # AL
+ 0x0600 .. 0x07BF,
+ 0x08A0 .. 0x08FF,
+ 0xFB50 .. 0xFDCF,
+ 0xFDF0 .. 0xFDFF,
+ 0xFE70 .. 0xFEFF,
+ 0x1EE00 .. 0x1EEFF,
+ # R
+ 0x0590 .. 0x05FF,
+ 0x07C0 .. 0x089F,
+ 0xFB1D .. 0xFB4F,
+ 0x10800 .. 0x10FFF,
+ 0x1E800 .. 0x1EDFF,
+ 0x1EF00 .. 0x1EFFF,
+ # ET
+ 0x20A0 .. 0x20CF,
+ ]
+ );
+ $default_BC_non_LtoR += $DI + $NChar;
+ push @special_delta_properties, property_ref('BidiClass');
+ $restricted_delta{$special_delta_properties[-1]} = ~ $default_BC_non_LtoR;
+
+ if (defined (my $eaw = property_ref('East_Asian_Width'))) {
+
+ my $default_EA_width_W = Range_List->new(Initialize =>
+ # From comments in v8.0 EastAsianWidth.txt
+ [
+ 0x3400 .. 0x4DBF,
+ 0x4E00 .. 0x9FFF,
+ 0xF900 .. 0xFAFF,
+ 0x20000 .. 0x2A6DF,
+ 0x2A700 .. 0x2B73F,
+ 0x2B740 .. 0x2B81F,
+ 0x2B820 .. 0x2CEAF,
+ 0x2F800 .. 0x2FA1F,
+ 0x20000 .. 0x2FFFD,
+ 0x30000 .. 0x3FFFD,
+ ]
+ );
+ push @special_delta_properties, $eaw;
+ $restricted_delta{$special_delta_properties[-1]}
+ = ~ $default_EA_width_W;
+
+ # Line break came along in the same release as East_Asian_Width, and
+ # the non-grab-bag default set is a superset of the EAW one.
+ if (defined (my $lb = property_ref('Line_Break'))) {
+ my $default_LB_non_XX = Range_List->new(Initialize =>
+ # From comments in v8.0 LineBreak.txt
+ [ 0x20A0 .. 0x20CF ]);
+ $default_LB_non_XX += $default_EA_width_W;
+ push @special_delta_properties, $lb;
+ $restricted_delta{$special_delta_properties[-1]}
+ = ~ $default_LB_non_XX;
+ }
+ }
+
+ # Go through every property, skipping those we've already worked on, those
+ # that are immutable, and the perl ones that will be calculated after this
+ # routine has done its fixup.
+ foreach my $property (property_ref('*')) {
+ next if $property == $perl # Done later in the program
+ || $property == $block # Done just above
+ || $property == $DI # Done just above
+ || $property == $NChar # Done just above
+
+ # The next two are invariant across Unicode versions
+ || $property == property_ref('Pattern_Syntax')
+ || $property == property_ref('Pattern_White_Space');
+
+ # Find the grab-bag value.
+ my $default_map = $property->default_map;
+
+ if (! $property->to_create_match_tables) {
+
+ # Here there aren't any match tables. So far, all such properties
+ # have a default map, and don't require special handling. Just
+ # change each newly assigned code point back to the default map,
+ # as if they were unassigned.
+ foreach my $range ($delta->ranges) {
+ $property->add_map($range->start,
+ $range->end,
+ $default_map,
+ Replace => $UNCONDITIONALLY);
+ }
+ }
+ else { # Here there are match tables. Find the one (if any) for the
+ # grab-bag value that unassigned code points go to.
+ my $default_table;
+ if (defined $default_map) {
+ $default_table = $property->table($default_map);
+ }
+
+ # If some code points don't go back to the the grab-bag when they
+ # are considered unassigned, exclude them from the list that does
+ # that.
+ my $this_delta = $delta;
+ my $this_unchanged = $unchanged;
+ if (grep { $_ == $property } @special_delta_properties) {
+ $this_delta = $delta & $restricted_delta{$property};
+ $this_unchanged = ~ $this_delta;
+ }
+
+ # Fix up each match table for this property.
+ foreach my $table ($property->tables) {
+ if (defined $default_table && $table == $default_table) {
+
+ # The code points assigned after release X (the ones we
+ # are excluding in this routine) go back on to the default
+ # (grab-bag) table. However, some of these tables don't
+ # actually exist, but are specified solely by the other
+ # tables. (In a binary property, we don't need to
+ # actually have an 'N' table, as it's just the complement
+ # of the 'Y' table.) Such tables will be locked, so just
+ # skip those.
+ $table += $this_delta unless $table->locked;
+ }
+ else {
+
+ # Here the table is not for the default value. We need to
+ # subtract the code points we are ignoring for this
+ # comparison (the deltas) from it. But if the table
+ # started out with nothing, no need to exclude anything,
+ # and want to skip it here anyway, so it gets listed
+ # properly in the pod.
+ next if $table->is_empty;
+
+ # Save the deltas for later, before we do the subtraction
+ my $deltas = $table & $this_delta;
+
+ $table &= $this_unchanged;
+
+ # Suppress the table if the subtraction left it with
+ # nothing in it
+ if ($table->is_empty) {
+ if ($property->type == $BINARY) {
+ push @tables_that_may_be_empty, $table->complete_name;
+ }
+ else {
+ $table->set_fate($SUPPRESSED, $after_first_version);
+ }
+ }
+
+ # Now we add the removed code points to the property's
+ # map, as they should now map to the grab-bag default
+ # property (which they did in the first comparison
+ # version). But we don't have to do this if the map is
+ # only for internal use.
+ if (defined $default_map && $property->to_output_map) {
+
+ # The gc property has pseudo property values whose names
+ # have length 1. These are the union of all the
+ # property values whose name is longer than 1 and
+ # whose first letter is all the same. The replacement
+ # is done once for the longer-named tables.
+ next if $property == $gc && length $table->name == 1;
+
+ foreach my $range ($deltas->ranges) {
+ $property->add_map($range->start,
+ $range->end,
+ $default_map,
+ Replace => $UNCONDITIONALLY);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ # The above code doesn't work on 'gc=C', as it is a superset of the default
+ # ('Cn') table. It's easiest to just special case it here.
+ my $C = $gc->table('C');
+ $C += $gc->table('Cn');
+
+ return;
+}
+