From: Karl Williamson Date: Thu, 30 Jan 2020 18:11:58 +0000 (-0700) Subject: regen/mk_invlists.pl; White space, comments only X-Git-Tag: v5.31.9~106 X-Git-Url: https://perl5.git.perl.org/perl5.git/commitdiff_plain/cf2cd61916d700e50f4854db59d6c298b9f133e8 regen/mk_invlists.pl; White space, comments only --- diff --git a/charclass_invlists.h b/charclass_invlists.h index f4c7dbd..56f8f04 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -389118,7 +389118,7 @@ const char * const deprecated_property_msgs[] = { #define UNI_LB__SG (UNI_LB__SG_perl_aux + (MAX_UNI_KEYWORD_INDEX * 1)) typedef enum { - PERL_BIN_PLACEHOLDER = 0, /* So no real value is zero */ + PERL_BIN_PLACEHOLDER = 0, /* So no real value is zero */ UNI_ADLM, UNI_AEGEANNUMBERS, UNI_AGE__10, @@ -395171,5 +395171,5 @@ static const U8 WB_table[23][23] = { * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl - * eb6b8e366260282e03108163fbf907367474c469310d716a4a5c7b244d88ce45 regen/mk_invlists.pl + * 1ff99adce69b8c7dee89b9251a429512650b2418cf54f0523d62d3d348849cf8 regen/mk_invlists.pl * ex: set ro: */ diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index 701e4c2..c3b556e 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1264,5 +1264,5 @@ # a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl # 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl -# eb6b8e366260282e03108163fbf907367474c469310d716a4a5c7b244d88ce45 regen/mk_invlists.pl +# 1ff99adce69b8c7dee89b9251a429512650b2418cf54f0523d62d3d348849cf8 regen/mk_invlists.pl # ex: set ro: diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 21e08ed..2a1efcf 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -394,7 +394,8 @@ sub output_invmap ($$$$$$$) { my $name_prefix; if ($input_format =~ / ^ [as] l? $ /x) { - $prop_name = (prop_aliases($prop_name))[1] // $prop_name =~ s/^_Perl_//r; # Get full name + $prop_name = (prop_aliases($prop_name))[1] + // $prop_name =~ s/^_Perl_//r; # Get full name my $short_name = (prop_aliases($prop_name))[0] // $prop_name; my @input_enums; @@ -588,10 +589,10 @@ sub output_invmap ($$$$$$$) { } } - # The short names tend to be two lower case letters, but it looks - # better for those if they are upper. XXX + # The short property names tend to be two lower case letters, but it + # looks better for those if they are upper. XXX $short_name = uc($short_name) if length($short_name) < 3 - || substr($short_name, 0, 1) =~ /[[:lower:]]/; + || substr($short_name, 0, 1) =~ /[[:lower:]]/; $name_prefix = "${short_name}_"; # Start the enum definition for this map @@ -739,10 +740,11 @@ sub output_invmap ($$$$$$$) { foreach my $table_number (@sorted_table_list) { my $table = $inverted_mults{$table_number}; output_table_header($out_fh, - $aux_declaration_type, - "$name_prefix$aux_table_prefix$table_number"); + $aux_declaration_type, + "$name_prefix$aux_table_prefix$table_number"); - # Earlier, we joined the elements of this table together with a comma + # Earlier, we joined the elements of this table together with + # a comma my @elements = split ",", $table; $aux_counts[$table_number] = scalar @elements; @@ -780,8 +782,9 @@ sub output_invmap ($$$$$$$) { "${name_prefix}${aux_table_prefix}lengths"); print $out_fh "\t0,\t/* Placeholder */\n"; for my $i (1 .. @sorted_table_list) { - print $out_fh ",\n" if $i > 1; - print $out_fh "\t$aux_counts[$i]\t/* $name_prefix$aux_table_prefix$i */"; + print $out_fh ",\n" if $i > 1; + print $out_fh + "\t$aux_counts[$i]\t/* $name_prefix$aux_table_prefix$i */"; } print $out_fh "\n"; output_table_trailer(); @@ -1213,7 +1216,8 @@ sub output_table_common { $spacers[$i] = " " x (length($names_ref->[$i]) - $column_width); } - output_table_header($out_fh, $table_type, "${property}_table", undef, $size, $size); + output_table_header($out_fh, $table_type, "${property}_table", undef, + $size, $size); # Calculate the column heading line my $header_line = "/* " @@ -2397,9 +2401,29 @@ push @props, sort { prop_name_for_cmp($a) cmp prop_name_for_cmp($b) } qw( ); # NOTE that the convention is that extra enum values come # after the property name, separated by commas, with the enums - # that aren't ever defined by Unicode coming last, at least 4 - # all-uppercase characters. The others are enum names that - # are needed by perl, but aren't in all Unicode releases. + # that aren't ever defined by Unicode (with some exceptions) + # containing at least 4 all-uppercase characters. + + # Some of the enums are current official property values that + # are needed for the rules in constructing certain tables in + # this file, and perhaps in regexec.c as well. They are here + # so that things don't crash when compiled on earlier Unicode + # releases where they don't exist. Thus the rules that use + # them still get compiled, but no code point actually uses + # them, hence they won't get exercized on such Unicode + # versions, but the code will still compile and run, though + # may not give the precise results that those versions would + # expect, but reasonable results nonetheless. + # + # Other enums are due to the fact that Unicode has in more + # recent versions added criteria to the rules in these extra + # tables that are based on factors outside the property + # values. And those have to be accounted for, essentially by + # here splitting certain enum equivalence classes based on + # those extra rules. + # + # EDGE is supposed to be a boundary between some types of + # enums, but khw thinks that isn't valid any more. my @bin_props; my @perl_prop_synonyms; @@ -2556,7 +2580,7 @@ foreach my $property (sort } } -@bin_props = sort { exists $keep_together{lc $b} <=> exists $keep_together{lc $a} +@bin_props = sort { exists $keep_together{lc $b} <=> exists $keep_together{lc $a} or $a cmp $b } @bin_props; @perl_prop_synonyms = sort(uniques(@perl_prop_synonyms)); @@ -2583,7 +2607,8 @@ foreach my $prop (@props) { $extra_enums = $1 if $prop_name =~ s/, ( .* ) //x; my $lookup_prop = $prop_name; $prop_name = sanitize_name($prop_name); - $prop_name = $table_name_prefix . $prop_name if grep { lc $lookup_prop eq lc $_ } @bin_props; + $prop_name = $table_name_prefix . $prop_name + if grep { lc $lookup_prop eq lc $_ } @bin_props; my $l1_only = ($lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//); my $nonl1_only = 0; @@ -2738,7 +2763,8 @@ foreach my $prop (@props) { # This shouldn't actually happen, as prop_invmap() returns # an extra element at the end that is beyond $upper_limit - die "inversion map (for $prop_name) that extends to infinity is unimplemented" unless @invlist > 1; + die "inversion map (for $prop_name) that extends to" + . " infinity is unimplemented" unless @invlist > 1; my $bucket; @@ -2819,7 +2845,8 @@ foreach my $prop (@props) { @{$mapped_lists{$bucket}} = sort{ $a <=> $b} @{$mapped_lists{$bucket}}; @{$mapped_lists{$bucket}} - = mk_invlist_from_sorted_cp_list(\@{$mapped_lists{$bucket}}); + = mk_invlist_from_sorted_cp_list( + \@{$mapped_lists{$bucket}}); # Add each even-numbered range in the bucket to %xlated; # so that the keys of %xlated become the range start code @@ -2835,9 +2862,10 @@ foreach my $prop (@props) { # so that later the adjusting doesn't think the # subsequent items can go away because of the # adjusting. - my $range_end = ($to_adjust && $bucket != $map_default) - ? $mapped_lists{$bucket}->[1] - 1 - : $range_start; + my $range_end = ( $to_adjust + && $bucket != $map_default) + ? $mapped_lists{$bucket}->[1] - 1 + : $range_start; for my $i ($range_start .. $range_end) { $xlated{$i} = $bucket; } @@ -2880,8 +2908,8 @@ foreach my $prop (@props) { unshift @invmap, $xlated{$start}; } - # Finally prepend the inversion list we have just constructed to the - # one that contains anything we didn't process. + # Finally prepend the inversion list we have just constructed + # to the one that contains anything we didn't process. unshift @invlist, @new_invlist; } } @@ -2910,12 +2938,13 @@ foreach my $prop (@props) { # odd-numbered give ones that begin ranges that don't match. # If $i is odd, we are at the first code point above 255 that # doesn't match, which means the range it is ending does - # match, and crosses the 255/256 boundary. We want to include - # this ending point, so increment $i, so the splice below - # includes it. Conversely, if $i is even, it is the first - # code point above 255 that matches, which means there was no - # matching range that crossed the boundary, and we don't want - # to include this code point, so splice before it. + # match, and crosses the 255/256 boundary. We want to + # include this ending point, so increment $i, so the + # splice below includes it. Conversely, if $i is even, it + # is the first code point above 255 that matches, which + # means there was no matching range that crossed the + # boundary, and we don't want to include this code point, + # so splice before it. $i++ if $i % 2 != 0; # Remove everything past this. @@ -3006,7 +3035,8 @@ if (scalar keys %deprecated_tags) { } } -print $out_fh "\ntypedef enum {\n\tPERL_BIN_PLACEHOLDER = 0, /* So no real value is zero */\n\t"; +print $out_fh "\ntypedef enum {\n\tPERL_BIN_PLACEHOLDER = 0,", + " /* So no real value is zero */\n\t"; print $out_fh join ",\n\t", @enums; print $out_fh "\n"; print $out_fh "} binary_invlist_enum;\n"; @@ -3253,8 +3283,12 @@ print $keywords_fh <<"EOF"; EOF -my ($second_level, $seed1, $length_all_keys, $smart_blob, $rows) = MinimalPerfectHash::make_mph_from_hash(\%keywords); -print $keywords_fh MinimalPerfectHash::make_algo($second_level, $seed1, $length_all_keys, $smart_blob, $rows, undef, undef, undef, 'match_uniprop' ); +my ($second_level, $seed1, $length_all_keys, $smart_blob, $rows) + = MinimalPerfectHash::make_mph_from_hash(\%keywords); +print $keywords_fh MinimalPerfectHash::make_algo($second_level, $seed1, + $length_all_keys, $smart_blob, + $rows, undef, undef, undef, + 'match_uniprop' ); push @sources, 'regen/mph.pl'; read_only_bottom_close_and_rename($keywords_fh, \@sources); diff --git a/uni_keywords.h b/uni_keywords.h index 485a05f..34b9376 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7287,6 +7287,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) { * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl - * eb6b8e366260282e03108163fbf907367474c469310d716a4a5c7b244d88ce45 regen/mk_invlists.pl + * 1ff99adce69b8c7dee89b9251a429512650b2418cf54f0523d62d3d348849cf8 regen/mk_invlists.pl * cf1d68efb7d919d302c4005641eae8d36da6d7850816ad374b0c00b45e609f43 regen/mph.pl * ex: set ro: */