This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regen/mk_invlists.pl; White space, comments only
authorKarl Williamson <khw@cpan.org>
Thu, 30 Jan 2020 18:11:58 +0000 (11:11 -0700)
committerKarl Williamson <khw@cpan.org>
Thu, 30 Jan 2020 21:39:32 +0000 (14:39 -0700)
charclass_invlists.h
lib/unicore/uni_keywords.pl
regen/mk_invlists.pl
uni_keywords.h

index f4c7dbd..56f8f04 100644 (file)
@@ -389118,7 +389118,7 @@ const char * const deprecated_property_msgs[] = {
 #define UNI_LB__SG (UNI_LB__SG_perl_aux + (MAX_UNI_KEYWORD_INDEX * 1))
 
 typedef enum {
-       PERL_BIN_PLACEHOLDER = 0,  /* So no real value is zero */
+       PERL_BIN_PLACEHOLDER = 0, /* So no real value is zero */
        UNI_ADLM,
        UNI_AEGEANNUMBERS,
        UNI_AGE__10,
@@ -395171,5 +395171,5 @@ static const U8 WB_table[23][23] = {
  * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
  * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
- * eb6b8e366260282e03108163fbf907367474c469310d716a4a5c7b244d88ce45 regen/mk_invlists.pl
+ * 1ff99adce69b8c7dee89b9251a429512650b2418cf54f0523d62d3d348849cf8 regen/mk_invlists.pl
  * ex: set ro: */
index 701e4c2..c3b556e 100644 (file)
 # a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
 # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
 # 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
-# eb6b8e366260282e03108163fbf907367474c469310d716a4a5c7b244d88ce45 regen/mk_invlists.pl
+# 1ff99adce69b8c7dee89b9251a429512650b2418cf54f0523d62d3d348849cf8 regen/mk_invlists.pl
 # ex: set ro:
index 21e08ed..2a1efcf 100644 (file)
@@ -394,7 +394,8 @@ sub output_invmap ($$$$$$$) {
     my $name_prefix;
 
     if ($input_format =~ / ^ [as] l? $ /x) {
-        $prop_name = (prop_aliases($prop_name))[1] // $prop_name =~ s/^_Perl_//r; # Get full name
+        $prop_name = (prop_aliases($prop_name))[1]
+     // $prop_name =~ s/^_Perl_//r; # Get full name
         my $short_name = (prop_aliases($prop_name))[0] // $prop_name;
         my @input_enums;
 
@@ -588,10 +589,10 @@ sub output_invmap ($$$$$$$) {
             }
         }
 
-        # The short names tend to be two lower case letters, but it looks
-        # better for those if they are upper. XXX
+        # The short property names tend to be two lower case letters, but it
+        # looks better for those if they are upper. XXX
         $short_name = uc($short_name) if length($short_name) < 3
-                                      || substr($short_name, 0, 1) =~ /[[:lower:]]/;
+                                || substr($short_name, 0, 1) =~ /[[:lower:]]/;
         $name_prefix = "${short_name}_";
 
         # Start the enum definition for this map
@@ -739,10 +740,11 @@ sub output_invmap ($$$$$$$) {
             foreach my $table_number (@sorted_table_list) {
                 my $table = $inverted_mults{$table_number};
                 output_table_header($out_fh,
-                                       $aux_declaration_type,
-                                       "$name_prefix$aux_table_prefix$table_number");
+                                $aux_declaration_type,
+                                "$name_prefix$aux_table_prefix$table_number");
 
-                # Earlier, we joined the elements of this table together with a comma
+                # Earlier, we joined the elements of this table together with
+                # a comma
                 my @elements = split ",", $table;
 
                 $aux_counts[$table_number] = scalar @elements;
@@ -780,8 +782,9 @@ sub output_invmap ($$$$$$$) {
                                    "${name_prefix}${aux_table_prefix}lengths");
             print $out_fh "\t0,\t/* Placeholder */\n";
             for my $i (1 .. @sorted_table_list) {
-                print $out_fh  ",\n" if $i > 1;
-                print $out_fh  "\t$aux_counts[$i]\t/* $name_prefix$aux_table_prefix$i */";
+                print $out_fh ",\n" if $i > 1;
+                print $out_fh
+                    "\t$aux_counts[$i]\t/* $name_prefix$aux_table_prefix$i */";
             }
             print $out_fh "\n";
             output_table_trailer();
@@ -1213,7 +1216,8 @@ sub output_table_common {
         $spacers[$i] = " " x (length($names_ref->[$i]) - $column_width);
     }
 
-    output_table_header($out_fh, $table_type, "${property}_table", undef, $size, $size);
+    output_table_header($out_fh, $table_type, "${property}_table", undef,
+                        $size, $size);
 
     # Calculate the column heading line
     my $header_line = "/* "
@@ -2397,9 +2401,29 @@ push @props, sort { prop_name_for_cmp($a) cmp prop_name_for_cmp($b) } qw(
                 );
                 # NOTE that the convention is that extra enum values come
                 # after the property name, separated by commas, with the enums
-                # that aren't ever defined by Unicode coming last, at least 4
-                # all-uppercase characters.  The others are enum names that
-                # are needed by perl, but aren't in all Unicode releases.
+                # that aren't ever defined by Unicode (with some exceptions)
+                # containing at least 4 all-uppercase characters.
+                
+                # Some of the enums are current official property values that
+                # are needed for the rules in constructing certain tables in
+                # this file, and perhaps in regexec.c as well.  They are here
+                # so that things don't crash when compiled on earlier Unicode
+                # releases where they don't exist.  Thus the rules that use
+                # them still get compiled, but no code point actually uses
+                # them, hence they won't get exercized on such Unicode
+                # versions, but the code will still compile and run, though
+                # may not give the precise results that those versions would
+                # expect, but reasonable results nonetheless.
+                #
+                # Other enums are due to the fact that Unicode has in more
+                # recent versions added criteria to the rules in these extra
+                # tables that are based on factors outside the property
+                # values.  And those have to be accounted for, essentially by
+                # here splitting certain enum equivalence classes based on
+                # those extra rules.
+                #
+                # EDGE is supposed to be a boundary between some types of
+                # enums, but khw thinks that isn't valid any more.
 
 my @bin_props;
 my @perl_prop_synonyms;
@@ -2556,7 +2580,7 @@ foreach my $property (sort
     }
 }
 
-@bin_props = sort {  exists $keep_together{lc $b} <=> exists $keep_together{lc $a}
+@bin_props = sort { exists $keep_together{lc $b} <=> exists $keep_together{lc $a}
                    or $a cmp $b
                   } @bin_props;
 @perl_prop_synonyms = sort(uniques(@perl_prop_synonyms));
@@ -2583,7 +2607,8 @@ foreach my $prop (@props) {
     $extra_enums = $1 if $prop_name =~ s/, ( .* ) //x;
     my $lookup_prop = $prop_name;
     $prop_name = sanitize_name($prop_name);
-    $prop_name = $table_name_prefix . $prop_name if grep { lc $lookup_prop eq lc $_ } @bin_props;
+    $prop_name = $table_name_prefix . $prop_name
+                                if grep { lc $lookup_prop eq lc $_ } @bin_props;
     my $l1_only = ($lookup_prop =~ s/^L1Posix/XPosix/
                     or $lookup_prop =~ s/^L1//);
     my $nonl1_only = 0;
@@ -2738,7 +2763,8 @@ foreach my $prop (@props) {
 
                     # This shouldn't actually happen, as prop_invmap() returns
                     # an extra element at the end that is beyond $upper_limit
-                    die "inversion map (for $prop_name) that extends to infinity is unimplemented" unless @invlist > 1;
+                    die "inversion map (for $prop_name) that extends to"
+                      . " infinity is unimplemented" unless @invlist > 1;
 
                     my $bucket;
 
@@ -2819,7 +2845,8 @@ foreach my $prop (@props) {
                     @{$mapped_lists{$bucket}}
                                     = sort{ $a <=> $b} @{$mapped_lists{$bucket}};
                     @{$mapped_lists{$bucket}}
-                     = mk_invlist_from_sorted_cp_list(\@{$mapped_lists{$bucket}});
+                     = mk_invlist_from_sorted_cp_list(
+                                                    \@{$mapped_lists{$bucket}});
 
                     # Add each even-numbered range in the bucket to %xlated;
                     # so that the keys of %xlated become the range start code
@@ -2835,9 +2862,10 @@ foreach my $prop (@props) {
                             # so that later the adjusting doesn't think the
                             # subsequent items can go away because of the
                             # adjusting.
-                            my $range_end = ($to_adjust && $bucket != $map_default)
-                                             ? $mapped_lists{$bucket}->[1] - 1
-                                             : $range_start;
+                            my $range_end = (     $to_adjust
+                                               && $bucket != $map_default)
+                                            ? $mapped_lists{$bucket}->[1] - 1
+                                            : $range_start;
                             for my $i ($range_start .. $range_end) {
                                 $xlated{$i} = $bucket;
                             }
@@ -2880,8 +2908,8 @@ foreach my $prop (@props) {
                     unshift @invmap, $xlated{$start};
                 }
 
-                # Finally prepend the inversion list we have just constructed to the
-                # one that contains anything we didn't process.
+                # Finally prepend the inversion list we have just constructed
+                # to the one that contains anything we didn't process.
                 unshift @invlist, @new_invlist;
             }
         }
@@ -2910,12 +2938,13 @@ foreach my $prop (@props) {
                     # odd-numbered give ones that begin ranges that don't match.
                     # If $i is odd, we are at the first code point above 255 that
                     # doesn't match, which means the range it is ending does
-                    # match, and crosses the 255/256 boundary.  We want to include
-                    # this ending point, so increment $i, so the splice below
-                    # includes it.  Conversely, if $i is even, it is the first
-                    # code point above 255 that matches, which means there was no
-                    # matching range that crossed the boundary, and we don't want
-                    # to include this code point, so splice before it.
+                    # match, and crosses the 255/256 boundary.  We want to
+                    # include this ending point, so increment $i, so the
+                    # splice below includes it.  Conversely, if $i is even, it
+                    # is the first code point above 255 that matches, which
+                    # means there was no matching range that crossed the
+                    # boundary, and we don't want to include this code point,
+                    # so splice before it.
                     $i++ if $i % 2 != 0;
 
                     # Remove everything past this.
@@ -3006,7 +3035,8 @@ if (scalar keys %deprecated_tags) {
     }
 }
 
-print $out_fh "\ntypedef enum {\n\tPERL_BIN_PLACEHOLDER = 0,  /* So no real value is zero */\n\t";
+print $out_fh "\ntypedef enum {\n\tPERL_BIN_PLACEHOLDER = 0,",
+              " /* So no real value is zero */\n\t";
 print $out_fh join ",\n\t", @enums;
 print $out_fh "\n";
 print $out_fh "} binary_invlist_enum;\n";
@@ -3253,8 +3283,12 @@ print $keywords_fh <<"EOF";
 
 EOF
 
-my ($second_level, $seed1, $length_all_keys, $smart_blob, $rows) = MinimalPerfectHash::make_mph_from_hash(\%keywords);
-print $keywords_fh MinimalPerfectHash::make_algo($second_level, $seed1, $length_all_keys, $smart_blob, $rows, undef, undef, undef, 'match_uniprop' );
+my ($second_level, $seed1, $length_all_keys, $smart_blob, $rows)
+                        = MinimalPerfectHash::make_mph_from_hash(\%keywords);
+print $keywords_fh MinimalPerfectHash::make_algo($second_level, $seed1,
+                                                 $length_all_keys, $smart_blob,
+                                                 $rows, undef, undef, undef,
+                                                 'match_uniprop' );
 
 push @sources, 'regen/mph.pl';
 read_only_bottom_close_and_rename($keywords_fh, \@sources);
index 485a05f..34b9376 100644 (file)
@@ -7287,6 +7287,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
  * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
  * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
- * eb6b8e366260282e03108163fbf907367474c469310d716a4a5c7b244d88ce45 regen/mk_invlists.pl
+ * 1ff99adce69b8c7dee89b9251a429512650b2418cf54f0523d62d3d348849cf8 regen/mk_invlists.pl
  * cf1d68efb7d919d302c4005641eae8d36da6d7850816ad374b0c00b45e609f43 regen/mph.pl
  * ex: set ro: */