Upgrade podlators from version 4.10 to 4.11

[perl5.git] / regen / mk_invlists.pl
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl

index 6f6fba3..fbec6c7 100644 (file)
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -8,10 +8,12 @@ use Unicode::UCD qw(prop_aliases
                      prop_invlist
                      prop_invmap search_invlist
                      charprop
+                    num
                     );
  require './regen/regen_lib.pl';
  require './regen/charset_translations.pl';
  require './lib/unicore/Heavy.pl';
+use re "/aa";
  
  # This program outputs charclass_invlists.h, which contains various inversion
  # lists in the form of C arrays that are to be used as-is for inversion lists.
@@ -24,13 +26,13 @@ require './lib/unicore/Heavy.pl';
  # out-of-sync, or the wrong data structure being passed.  Currently that
  # random number is:
  
-# charclass_invlists.h now also has a partial implementation of inversion
-# maps; enough to generate tables for the line break properties, such as GCB
+# charclass_invlists.h now also contains inversion maps and enum definitions
+# for those maps that have a finite number of possible values
  
  my $VERSION_DATA_STRUCTURE_TYPE = 148565664;
  
  # integer or float
-my $numeric_re = qr/ ^ -? \d+ (:? \. \d+ )? $ /ax;
+my $numeric_re = qr/ ^ -? \d+ (:? \. \d+ )? $ /x;
  
  my %keywords;
  my $table_name_prefix = "PL_";
@@ -92,7 +94,14 @@ foreach my $name (sort keys %utf8::loose_property_name_of) {
  my %keep_together = (
                          assigned => 1,
                          ascii => 1,
+                        upper => 1,
+                        lower => 1,
+                        title => 1,
                          cased => 1,
+                        uppercaseletter => 1,
+                        lowercaseletter => 1,
+                        titlecaseletter => 1,
+                        casedletter => 1,
                          vertspace => 1,
                          xposixalnum => 1,
                          xposixalpha => 1,
@@ -621,7 +630,7 @@ sub output_invmap ($$$$$$$) {
              # that.
              for (my $i = 0; $i < @decimals_invlist; $i += 2) {
                  my $code_point = $decimals_invlist[$i];
-                next if chr($code_point) !~ /\p{Nv=0}/;
+                next if num(chr($code_point)) ne '0';
  
                  # Turn the scripts this zero is in into a list.
                  my @scripts = split ",",
@@ -832,7 +841,7 @@ sub _Perl_IVCF {
      #    other.  This situation happens in Unicode 3.0.1, but probably no
      #    other version.
      foreach my $fold (keys %new) {
-        my $folds_to_string = $fold =~ /\D/a;
+        my $folds_to_string = $fold =~ /\D/;
  
          # If the bucket contains only one element, convert from an array to a
          # scalar
@@ -1030,7 +1039,7 @@ sub output_table_common {
  
      for my $i (0 .. $size - 1) {
          no warnings 'numeric';
-        $has_placeholder = 1 if $names_ref->[$i] =~ / ^ [[:lower:]] $ /ax;
+        $has_placeholder = 1 if $names_ref->[$i] =~ / ^ [[:lower:]] $ /x;
          $spacers[$i] = " " x (length($names_ref->[$i]) - $column_width);
      }
  
@@ -2182,6 +2191,27 @@ my %enums;
  my @deprecated_messages = "";   # Element [0] is a placeholder
  my %deprecated_tags;
  
+my $float_e_format = qr/ ^ -? \d \. \d+ e [-+] \d+ $ /x;
+
+# Create another hash that maps floating point x.yyEzz representation to what
+# %stricter_to_file_of does for the equivalent rational.  A typical entry in
+# the latter hash is
+#
+#    'nv=1/2' => 'Nv/1_2',
+#
+# From that, this loop creates an entry
+#
+#    'nv=5.00e-01' => 'Nv/1_2',
+#
+# %stricter_to_file_of contains far more than just the rationals.  Instead we
+# use %utf8::nv_floating_to_rational which should have an entry for each
+# nv in the former hash.
+my %floating_to_file_of;
+foreach my $key (keys %utf8::nv_floating_to_rational) {
+    my $value = $utf8::nv_floating_to_rational{$key};
+    $floating_to_file_of{$key} = $utf8::stricter_to_file_of{"nv=$value"};
+}
+
  # Collect all the binary properties from data in lib/unicore
  # Sort so that complements come after the main table, and the shortest
  # names first, finally alphabetically.  Also, sort together the tables we want
@@ -2191,11 +2221,13 @@ foreach my $property (sort
          {   exists $keep_together{lc $b} <=> exists $keep_together{lc $a}
           or $b =~ /posix/i <=> $a =~ /posix/i
           or $b =~ /perl/i <=> $a =~ /perl/i
+         or $a =~ $float_e_format <=> $b =~ $float_e_format
           or $a =~ /!/ <=> $b =~ /!/
           or length $a <=> length $b
           or $a cmp $b
          }   keys %utf8::loose_to_file_of,
-            keys %utf8::stricter_to_file_of
+            keys %utf8::stricter_to_file_of,
+            keys %floating_to_file_of
  ) {
  
      # These two hashes map properties to values that can be considered to
@@ -2203,11 +2235,15 @@ foreach my $property (sort
      # identical entries.  Otherwise they differ in some way.
      my $tag = $utf8::loose_to_file_of{$property};
      $tag = $utf8::stricter_to_file_of{$property} unless defined $tag;
+    $tag = $floating_to_file_of{$property} unless defined $tag;
  
      # The tag may contain an '!' meaning it is identical to the one formed
      # by removing the !, except that it is inverted.
      my $inverted = $tag =~ s/!//;
  
+    # This hash is lacking the property name
+    $property = "nv=$property" if $property =~ $float_e_format;
+
      # The list of 'prop=value' entries that this single entry expands to
      my @this_entries;
  
@@ -2408,9 +2444,10 @@ foreach my $prop (@props) {
          # 255 because a re-ordering could cause 256 to need to be in the same
          # range as 255.)
          if (       (@invmap && $maps_to_code_point)
-            || (   ($invlist[0] < 256
+            || (    @invlist
+                &&  $invlist[0] < 256
                  && (    $invlist[0] != 0
-                    || (scalar @invlist != 1 && $invlist[1] < 256)))))
+                    || (scalar @invlist != 1 && $invlist[1] < 256))))
          {
              $same_in_all_code_pages = 0;
              if (! @invmap) {    # Straight inversion list
@@ -2616,6 +2653,10 @@ foreach my $prop (@props) {
                  unshift @invlist, @new_invlist;
              }
          }
+        elsif (@invmap) {   # inversion maps can't cope with this variable
+                            # being true, even if it could be true
+            $same_in_all_code_pages = 0;
+        }
          else {
              $same_in_all_code_pages = 1;
          }
@@ -2787,7 +2828,7 @@ require './regen/mph.pl';
  sub token_name
  {
      my $name = sanitize_name(shift);
-    warn "$name contains non-word" if $name =~ /\W/a;
+    warn "$name contains non-word" if $name =~ /\W/;
  
      return "$table_name_prefix\U$name"
  }
@@ -2796,6 +2837,13 @@ my $keywords_fh = open_new('uni_keywords.h', '>',
                   {style => '*', by => 'regen/mk_invlists.pl',
                    from => "mph.pl"});
  
+no warnings 'once';
+print $keywords_fh <<"EOF";
+/* The precisionn to use in "%.*e" formats */
+#define PL_E_FORMAT_PRECISION $utf8::e_precision
+
+EOF
+
  my ($second_level, $seed1, $length_all_keys, $smart_blob, $rows) = MinimalPerfectHash::make_mph_from_hash(\%keywords);
  print $keywords_fh MinimalPerfectHash::make_algo($second_level, $seed1, $length_all_keys, $smart_blob, $rows, undef, undef, undef, 'match_uniprop' );