This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
mktables: Allow strictly named map tables
authorKarl Williamson <khw@cpan.org>
Fri, 26 Jun 2015 17:50:18 +0000 (11:50 -0600)
committerKarl Williamson <khw@cpan.org>
Wed, 29 Jul 2015 04:15:54 +0000 (22:15 -0600)
There are several types of tables generated by mktables.  Most are
binary (match) tables, but another class is mapping tables.  The names
for these may be loosely matched, but until this commit only the match
tables could have strict matching applied.

Strict matching is used for certain table names where loose could be
ambiguous, and for all names that aren't to be used by anything except
the perl core.

charclass_invlists.h
lib/unicore/mktables
lib/utf8_heavy.pl
regcharclass.h

index 1722c41..ca01032 100644 (file)
@@ -99521,7 +99521,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * e4922dd43f4c37e32a02451581c90af7bc2c6351a0fb23f3f06313a618a6abb0 lib/unicore/mktables
+ * 571e444a844d3c9bbea922442d675b686aa453b65a3a71235e95982cd1aa5e61 lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
  * f199f92c0b5f87882b0198936ea8ef3dc43627b57a77ac3eb9250bd2664bbd88 regen/mk_invlists.pl
index 5135274..7ca6a4c 100644 (file)
@@ -1375,6 +1375,7 @@ my %loose_to_file_of;       # loosely maps table names to their respective
                             # files
 my %stricter_to_file_of;    # same; but for stricter mapping.
 my %loose_property_to_file_of; # Maps a loose property name to its map file
+my %strict_property_to_file_of; # Same, but strict
 my @inline_definitions = "V0"; # Each element gives a definition of a unique
                             # inversion list.  When a definition is inlined,
                             # its value in the hash it's in (one of the two
@@ -1387,6 +1388,8 @@ my %nv_floating_to_rational; # maps numeric values floating point numbers to
                              # their rational equivalent
 my %loose_property_name_of; # Loosely maps (non_string) property names to
                             # standard form
+my %strict_property_name_of; # Strictly maps (non_string) property names to
+                            # standard form
 my %string_property_loose_to_name; # Same, for string properties.
 my %loose_defaults;         # keys are of form "prop=value", where 'prop' is
                             # the property name in standard loose form, and
@@ -14867,7 +14870,12 @@ sub register_file_for_name($$$) {
         # property's map table
         foreach my $alias ($table->aliases) {
             my $name = $alias->name;
-            $loose_property_to_file_of{standardize($name)} = $file;
+            if ($name =~ /^_/) {
+                $strict_property_to_file_of{lc $name} = $file;
+            }
+            else {
+                $loose_property_to_file_of{standardize($name)} = $file;
+            }
         }
 
         # And a way for utf8_heavy to find the proper key in the SwashInfo
@@ -16387,6 +16395,10 @@ sub make_Heavy () {
                            = simple_dumper(\%loose_property_name_of, ' ' x 4);
     chomp $loose_property_name_of;
 
+    my $strict_property_name_of
+                           = simple_dumper(\%strict_property_name_of, ' ' x 4);
+    chomp $strict_property_name_of;
+
     my $stricter_to_file_of = simple_dumper(\%stricter_to_file_of, ' ' x 4);
     chomp $stricter_to_file_of;
 
@@ -16425,6 +16437,10 @@ sub make_Heavy () {
                         = simple_dumper(\%loose_property_to_file_of, ' ' x 4);
     chomp $loose_property_to_file_of;
 
+    my $strict_property_to_file_of
+                        = simple_dumper(\%strict_property_to_file_of, ' ' x 4);
+    chomp $strict_property_to_file_of;
+
     my $file_to_swash_name = simple_dumper(\%file_to_swash_name, ' ' x 4);
     chomp $file_to_swash_name;
 
@@ -16440,6 +16456,11 @@ $INTERNAL_ONLY_HEADER
 $loose_property_name_of
 );
 
+# Same, but strict names
+\%utf8::strict_property_name_of = (
+$strict_property_name_of
+);
+
 # Gives the definitions (in the form of inversion lists) for those properties
 # whose definitions aren't kept in files
 \@utf8::inline_definitions = (
@@ -16488,6 +16509,11 @@ $caseless_equivalent_to
 $loose_property_to_file_of
 );
 
+# Property names to mapping files
+\%utf8::strict_property_to_file_of = (
+$strict_property_to_file_of
+);
+
 # Files to the swash names within them.
 \%utf8::file_to_swash_name = (
 $file_to_swash_name
@@ -17309,12 +17335,14 @@ sub write_all_tables() {
                         }
                     }
                     else {
-                        if (exists ($loose_property_name_of{$alias_standard}))
-                        {
-                            Carp::my_carp("There already is a property with the same standard name as $alias_name: $loose_property_name_of{$alias_standard}.  Old name is retained");
+                        my $hash_ref = ($alias_standard =~ /^_/)
+                                       ? \%strict_property_name_of
+                                       : \%loose_property_name_of;
+                        if (exists $hash_ref->{$alias_standard}) {
+                            Carp::my_carp("There already is a property with the same standard name as $alias_name: $hash_ref->{$alias_standard}.  Old name is retained");
                         }
                         else {
-                            $loose_property_name_of{$alias_standard}
+                            $hash_ref->{$alias_standard}
                                                 = $standard_property_name;
                         }
 
index 1ba73b2..89b146c 100644 (file)
@@ -404,7 +404,11 @@ sub _loose_name ($) {
                 # If didn't find it, try again with looser matching by editing
                 # out the applicable characters on the rhs and looking up
                 # again.
+                my $strict_property_and_table;
                 if (! defined $file) {
+
+                    # This isn't used unless the name begins with 'to'
+                    $strict_property_and_table = $property_and_table =~  s/^to//r;
                     $table = _loose_name($table);
                     $property_and_table = "$prefix$table";
                     print STDERR __LINE__, ": $property_and_table\n" if DEBUG;
@@ -444,10 +448,19 @@ sub _loose_name ($) {
                 ##
                 # Only check if caller wants non-binary
                 my $retried = 0;
-                if ($minbits != 1 && $property_and_table =~ s/^to//) {{
+                if ($minbits != 1) {
+                    if ($property_and_table =~ s/^to//) {
                     # Look input up in list of properties for which we have
-                    # mapping files.
-                    if (defined ($file =
+                    # mapping files.  First do it with the strict approach
+                        if (defined ($file =
+                            $utf8::strict_property_to_file_of{$strict_property_and_table}))
+                        {
+                            $type = $utf8::file_to_swash_name{$file};
+                            print STDERR __LINE__, ": type set to $type\n" if DEBUG;
+                            $file = "$unicore_dir/$file.pl";
+                            last GETFILE;
+                        }
+                        elsif (defined ($file =
                           $utf8::loose_property_to_file_of{$property_and_table}))
                     {
                         $type = $utf8::file_to_swash_name{$file};
@@ -497,7 +510,8 @@ sub _loose_name ($) {
                         $file = "$unicore_dir/lib/$file.pl" unless $file =~ m!^#/!;
                         last GETFILE;
                     }
-                } }
+                }
+                }
 
                 ##
                 ## If we reach this line, it's because we couldn't figure
index e9f4832..71f10ab 100644 (file)
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * e4922dd43f4c37e32a02451581c90af7bc2c6351a0fb23f3f06313a618a6abb0 lib/unicore/mktables
+ * 571e444a844d3c9bbea922442d675b686aa453b65a3a71235e95982cd1aa5e61 lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
  * 8b29da548b7ad90659de234b5061a8c9fb0f40322a256d60fc5e9385ae4ece0e regen/regcharclass.pl