X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/5a6319097f7a670153177fc870c130f13977a121..HEAD:/Porting/updateAUTHORS.pl diff --git a/Porting/updateAUTHORS.pl b/Porting/updateAUTHORS.pl index 981dbc1..bc31388 100755 --- a/Porting/updateAUTHORS.pl +++ b/Porting/updateAUTHORS.pl @@ -5,9 +5,11 @@ use warnings; use Getopt::Long qw(GetOptions); use Pod::Usage qw(pod2usage); use Data::Dumper; -use Encode qw(encode_utf8 decode_utf8 decode); +use Encode qw(encode_utf8 decode_utf8); use lib "./"; use Porting::updateAUTHORS; +use Test::More; +use Text::Wrap qw(wrap); # The style of this file is determined by: # @@ -20,15 +22,57 @@ my @OPTSPEC= qw( man authors_file=s mailmap_file=s + source_dir=s + validate|tap verbose+ + exclude_missing|exclude + exclude_contrib=s@ + exclude_me + dump_opts + + show_rank|rank + show_applied|thanks_applied|applied + show_stats|stats + show_who|who + show_files|files + show_file_changes|activity + show_file_chainsaw|chainsaw + + as_percentage|percentage + as_cumulative|cumulative + as_list|old_style + + in_reverse|reverse + with_rank_numbers|numbered|num + + from_commit|from=s + to_commit|to=s + + numstat + no_update + + change_name_for_name|change_name=s% + change_name_for_email=s% + change_email_for_name=s% + change_email_for_email|change_email=s% +); + +my %implies_numstat= ( + show_files => 1, + show_file_changes => 1, + show_file_chainsaw => 1, ); sub main { local $Data::Dumper::Sortkeys= 1; my %opts= ( - authors_file => "AUTHORS", - mailmap_file => ".mailmap", + authors_file => "AUTHORS", + mailmap_file => ".mailmap", + exclude_file => "Porting/exclude_contrib.txt", + from => "", + to => "", + exclude_contrib => [], ); ## Parse options and print usage if there is a syntax error, @@ -39,42 +83,372 @@ sub main { # support hyphens as well as underbars, # underbars must be first. Only handles two # part words right now. - s/\b([a-z]+)_([a-z]+)\b/${1}_${2}|${1}-${2}/gr - } @OPTSPEC + ref $_ ? $_ : s/\b([a-z]+)_([a-z]+)\b/${1}_${2}|${1}-${2}/gr + } @OPTSPEC, ) or pod2usage(2); + $opts{commit_range}= join " ", @ARGV; + if (!$opts{commit_range}) { + if ($opts{from_commit}) { + $opts{to_commit} ||= "HEAD"; + $opts{$_} =~ s/\.+\z// for qw(from_commit to_commit); + $opts{commit_range}= "$opts{from_commit}..$opts{to_commit}"; + } + } pod2usage(1) if $opts{help}; pod2usage(-verbose => 2) if $opts{man}; - my $self= Porting::updateAUTHORS->new(%opts); + foreach my $opt (keys %opts) { + $opts{numstat}++ if $implies_numstat{$opt}; + $opts{no_update}++ if $opt =~ /^show_/ or $opt eq "validate"; + } + + if (delete $opts{exclude_me}) { + my ($author_full)= + Porting::updateAUTHORS->current_author_name_email("full"); + my ($committer_full)= + Porting::updateAUTHORS->current_committer_name_email("full"); + + push @{ $opts{exclude_contrib} }, $author_full + if $author_full; + push @{ $opts{exclude_contrib} }, $committer_full + if $committer_full + and (!$author_full + or $committer_full ne $author_full); + } - $self->read_and_update(); + my $self= Porting::updateAUTHORS->new(%opts); - return 0; # 0 for no error - intended for exit(); + my $changed= $self->read_and_update(); + + if ($self->{validate}) { + for my $file_type (qw(authors_file mailmap_file exclude_file)) { + my $file= $self->{$file_type}; + my $changes= $self->changed_file($file); + ok(!$changes, "Is $file_type '$file' up to date?") + or diag $self->_diff_diag($file); + } + my $dupe_info= $self->dupe_info(); + ok(!$dupe_info, "No dupes in AUTHORS") + or diag $dupe_info; + + ok( + !$self->{missing_author}{$_}, + sprintf "%s is listed in AUTHORS", + _clean_name($_)) for sort keys %{ $self->{missing_author} || {} }; + + SKIP: { + # What is tested in this block: + # - check if there uncommitted changes in the git-tree + # - if so: is the (configured) author a known contributor? + + skip "AUTOMATED_TESTING is set" if ($ENV{AUTOMATED_TESTING}); + + # Test::Smoke leaves some files in the build dir which causes + # this code to (correctly) conclude that there are uncommitted + # files which then proceeds to check the author name/email. + # + # On several smokers: + # - there is *no* git config; + # - a different name/address is configured then the one listed + # in AUTHORS; + # which causes the test to fail. + # + # Unfortunately Test::Smoke doesn't set the AUTOMATED_TESTING + # env-var.. Therefor check if mktest.out exist, it's one of the + # first files Test::Smoke creates in the build directory. + skip "Test::Smoke running" if (-e "./mktest.out"); + + my $uncommitted_files= $self->git_status_porcelain; + if ($uncommitted_files) { + my ($author_name, $author_email)= + $self->current_author_name_email(); + my ($committer_name, $committer_email)= + $self->current_committer_name_email(); + + ok($author_name && $author_email, + "git knows your author name and email."); + ok( + $committer_name && $committer_email, + "git knows your committer name and email." + ); + + my $author_known= + $self->known_contributor($author_name, $author_email); + my $committer_known= + $self->known_contributor($committer_name, $committer_email); + if ( + is( + $author_known && $committer_known, + 1, "Uncommitted changes are by a known contributor?" + )) + { + diag + "Testing uncommtted changes! Remember to commit before you push!" + if $ENV{TEST_VERBOSE}; + } + else { + diag error_advice_for_uncommitted_changes( + $author_name, $author_email, + $committer_name, $committer_email, + $uncommitted_files + ); + } + } + else { + # this will always pass... but it adds test output that is helpful + ok(!$uncommitted_files, + "git status --porcelain should be empty"); + } + } + + diag "\nFiles need updating! You probably just need to run\n\n", + " Porting/updateAUTHORS.pl\n\n", "and commit the results." + if $self->changed_count; + done_testing(); + return 0; + } + elsif ($self->{show_rank}) { + $self->report_stats("who_stats", "author"); + return 0; + } + elsif ($self->{show_applied}) { + $self->report_stats("who_stats", "applied"); + return 0; + } + elsif ($self->{show_stats}) { + my @fields= ("author", "applied", "committer"); + push @fields, + ("num_files", "lines_added", "lines_removed", "lines_delta") + if $self->{numstat}; + $self->report_stats("who_stats", @fields); + return 0; + } + elsif ($self->{show_files}) { + $self->report_stats( + "file_stats", "commits", "lines_added", "lines_removed", + "lines_delta", "binary_change" + ); + return 0; + } + elsif ($self->{show_file_changes}) { + $self->report_stats( + "file_stats", "lines_delta", "lines_added", "lines_removed", + "commits" + ); + return 0; + } + elsif ($self->{show_file_chainsaw}) { + $self->{in_reverse}= !$self->{in_reverse}; + $self->report_stats( + "file_stats", "lines_delta", "lines_added", "lines_removed", + "commits" + ); + return 0; + } + elsif ($self->{show_who}) { + $self->print_who(); + return 0; + } + return $changed; # 0 means nothing changed } exit(main()) unless caller; +sub error_advice_for_uncommitted_changes { + my ( + $author_name, $author_email, $committer_name, + $committer_email, $uncommitted_files + )= @_; + $_ //= "" + for $author_name, $author_email, $committer_name, $committer_email; + my $extra= ""; + my @git_env_keys= + map { /^GIT_(AUTHOR|COMMITTER)_(NAME|EMAIL)\z/ ? "$_='$ENV{$_}'" : () } + sort keys %ENV; + if (@git_env_keys) { + $extra .= "\n" . wrap "", "", + "Its seems that your environment has " + . join(", ", @git_env_keys) + . " defined. This may cause this test to fail.\n\n"; + } + + my $quote= $^O =~ /Win/ ? '"' : "'"; + my @config= map decode_utf8($_), + `git config --get-regexp $quote^(user|author|committer).(name|email)$quote`; + if (@config) { + + $extra .= + "\nYou have configured the following relevant git config settings:\n\n" + . join("", + map { sprintf " %-16s = %s", split /\s+/, $_, 2 } @config) + . "\n"; + } + else { + $extra .= + "\nYou do not have any git user config set up, consider using\n\n" + . " git config user.name 'Your Name'\n" + . " git config user.email 'your\@email.com'\n\n"; + } + + my $props= ""; + if ( $author_name ne $committer_name + or $author_email ne $committer_email) + { + $props .= < - Automatically update F and F<.mailmap> -based on commit data. +and F based on commit data. =head1 SYNOPSIS -Porting/updateAUTHORS.pl +Porting/updateAUTHORS.pl [OPTIONS] [GIT_REF_RANGE] + +By default scans the commit history specified (or the entire history from the +current commit) and then updates F and F<.mailmap> so all contributors +are properly listed. Options: --help brief help message --man full documentation --verbose be verbose + Commit Range: + --from=GIT_REF Select commits to use + --to=GIT_REF Select commits to use, defaults to HEAD + File Locations: --authors-file=FILE override default of 'AUTHORS' --mailmap-file=FILE override default of '.mailmap' + Action Modifiers + --no-update Do not update. + --validate output TAP about status and change nothing + --exclude-missing Add new names to the exclude file so they never + appear in AUTHORS or .mailmap. + + Details Changes + Update canonical name or email in AUTHORS and .mailmap properly. + --exclude-contrib NAME_AND_EMAIL + --exclude-me + --change-name OLD_NAME=NEW_NAME + --change-name-for-email OLD_ADDR=NEW_NAME + --change-email-for-name OLD_NAME=NEW_ADDR + --change-email OLD_ADDR=NEW_EMAIL + + Reports About People + --stats detailed report of authors and what they did + --who Sorted, wrapped list of who did what + --thanks-applied report who applied stuff for others + --rank report authors by number of commits created + + Reports About Files + --files detailed report files that were modified + --activity simple report of files that grew the most + --chainsaw simple report of files that shrank the most + + Report Modifiers + --percentage show percentages not counts + --cumulative show cumulative numbers not individual + --reverse show reports in reverse order + --numstat show additional file based data in some reports + (not needed for most reports) + --as-list show reports with names with common values + folded into a list like checkAUTHORS.pl used to + --numbered add rank numbers to reports where they are missing + =head1 OPTIONS =over 4 @@ -91,6 +465,17 @@ Prints the manual page and exits. Be verbose about what is happening. Can be repeated more than once. +=item C<--no-update> + +Do not update files on disk even if they need to be changed. + +=item C<--validate> + +=item C<--tap> + +Instead of modifying files, test to see which would be modified and +output TAP test output about the validation. + =item C<--authors-file=FILE> =item C<--authors_file=FILE> @@ -105,6 +490,153 @@ the F file in the current directory. Override the default location of the mailmap file, which is by default the F<.mailmap> file in the current directory. +=item C<--exclude-file=FILE> + +=item C<--exclude_file=FILE> + +Override the default location of the exclude file, which is by default +the F file reachable from the current +directory. + +=item C<--exclude-contrib=NAME_AND_EMAIL> + +=item C<--exclude_contrib=NAME_AND_EMAIL> + +Exclude a specific name/email combination from our contributor datasets. +Can be repeated multiple times on the command line to remove multiple +items at once. If the contributor details correspond to a canonical +identity of a contributor (one that is in the AUTHORS file or on the +left in the .mailmap file) then ALL records, including those linked to +that identity in .mailmap will be marked for exclusion. This is similar +to C<--exclude-missing> but it only affects the specifically named +users. Note that the format for NAME_AND_EMAIL is similar to that of the +.mailmap file, email addresses and C< @github > style identifiers should +be wrapped in angle brackets like this: C<< <@github> >>, users with no +email in the AUTHORS file should use C<< >>. + +For example: + + Porting/updateAUTHORS.pl --exclude-contrib="Joe B " + +Would remove all references to "Joe B" from F and F<.mailmap> +and add the required entires to F such that +the contributor would never be automatically added back, and would be +automatically removed should someone read them manually. + +=item C<--exclude-missing> + +=item C<--exclude_missing> + +=item C<--exclude> + +Normally when the tool is run it *adds* missing data only. If this +option is set then the reverse will happen, any author data missing will +be marked as intentionally missing in such a way that future "normal" +runs of the script ignore the author(s) that were excluded. + +The exclude data is stored in F as a SHA256 +digest (in base 64) of the user name and email being excluded so that +the list itself doesnt contain the contributor details in plain text. + +The general idea is that if you want to remove someone from F +and F<.mailmap> you delete their details manually, and then run this +tool with the C<--exclude> option. It is probably a good idea to run it +first without any arguments to make sure you dont exclude something or +someone you did not intend to. + +=item C<--stats> + +Show detailed stats about committers and the work they did in a tabular +form. If the C<--numstat> option is provided this report will provide +additional data about the files a developer worked on. May be slow the +first time it is used as git unpacks the relevant data. + +=item C<--who> + +Show a list of which committers and authors contributed to the project +in the selected range of commits. The list will contain the name only, +and will sorted according to unicode collation rules. This list is +suitable in release notes and similar contexts. + +=item C<--thanks-applied> + +Show a report of which committers applied work on behalf of +someone else, including counts. Modified by the C<--as-list> and +C<--display-rank>. + +=item C<--rank> + +Shows a report of which commits did the most work. Modified by the +C<--as-list> and C<--display-rank> options. + +=item C<--files> + +Show detailed stats about the files that have been modified in the +selected range of commits. Implies C<--numstat>. May be slow the first +time it is used as git unpacks the relevant data. + +=item C<--activity> + +Show simple stats about which files had the most additions. Implies +C<--numstat>. May be slow the first time it is used as git unpacks the +relevant data. + + +=item C<--chainsaw> + +Show simple stats about whcih files had the most removals. Implies +C<--numstat>. May be slow the first time it is used as git unpacks the +relevant data. + +=item C<--percentage> + +Show numeric data as percentages of the total, not counts. + +=item C<--cumulative> + +Show numeric data as cumulative counts in the reports. + +=item C<--reverse> + +Show the reports in reverse order to normal. + +=item C<--numstat> + +Gather additional data about the files that were changed, not just the +authors who did the changes. This option currently is only necessary for +the C<--stats> option, which will display additional data when this +option is also provided. + +=item C<--as-list> + +Show the reports with name data rolled up together into a list like the +older checkAUTHORS.pl script would have. + +=item C<--numbered> + +Show an additional column with the rank number of a row in the report in +reports that do not normally show the rank number. + +=item C<--change-name OLD_NAME=NEW_NAME> + +=item C<--change-name-for-email OLD_EMAIL=NEW_NAME> + +=item C<--change-email OLD_EMAIL=NEW_EMAIL> + +=item C<--change-email-for-name OLD_NAME=NEW_EMAIL> + +Change email or name based on OLD_NAME or OLD_EMAIL. + +Eg, + + --change-name-for-email somebody@gmail.com="Bob Rob" + +would cause the preferred name for the person with the preferred email +C to change to "Bob Rob" in our records. If that +persons name was "Daniel Dude" then we might have done this as well: + + --change-name "Bob Rob"="Daniel Dude" + =back =head1 DESCRIPTION @@ -115,31 +647,106 @@ the files themselves. It uses no other sources of data. Expects to be run from the root directory of a git repo of perl. In simple, execute the script and it will either die with a helpful -message or it will update the files as necessary, possibly not at all if -there is no need to do so. Note it will actually rewrite the files at -least once, but it may not actually make any changes to their content. -Thus to use the script is currently required that the files are -modifiable. - -Review the changes it makes to make sure they are sane. If they are -commit. If they are not then update the AUTHORS or .mailmap files as is -appropriate and run the tool again. Typically you shouldn't need to do -either unless you are changing the default name or email for a user. For -instance if a person currently listed in the AUTHORS file whishes to -change their preferred name or email then change it in the AUTHORS file -and run the script again. I am not sure when you might need to directly -modify .mailmap, usually modifying the AUTHORS file should suffice. +message or it will update the files as necessary, possibly not at all +if there is no need to do so. If the C<--validate> option is provided +the content will not be updated and instead the tool will act as a +test script validating that the F and F<.mailmap> files are +up to date. -=head1 TODO +By default the script operates on the *entire* history of Perl +development that is reachable from HEAD. This can be overriden by using +the C<--from> and C<--to> options, or providing a git commit range as an +argument after the options just like you might do with C. -More documentation and testing. +The script can also be used to produce various reports and other content +about the commits it has analyzed. + +=head2 ADDING A NEW CONTRIBUTOR + +Commit your changes. Run the tool with no arguments. It will add +anything that is missing. Check the changes and then commit them. + +=head2 CHANGING A CONTRIBUTORS CANONICAL NAME OR EMAIL + +Use the C<--change-name-for-name> and related options. This will do +things "properly" and update all the files. + +=head2 A CONTRIBUTOR WANTS TO BE FORGOTTEN -=head1 SEE ALSO +There are several ways to do this: -F +=over 2 + +=item Manual Exclusion + +Manually modify F and F<.mailmap> so the user detals are +removed and then run this tool with the C<--exclude> option. This should +result in various SHA-256 digests (in base64) being added to +F. Commit the changes afterwards. + +=item Exclude Yourself + +Use the C<--exclude-me> option to the tool, review and commit the results. +This will use roughly the same rules that git would to figure out what your +name and email are. + +=item Exclude Someone Else + +Use the C<--exclude-contrib> option and specify their name and email. +For example + + --exclude-contrib="Their Name " + +Should exclude the person with this name from our files. + +=back + +Note that excluding a person by canonical details (that is the details +in the F file) will result in their .mailmap'ed names being +excluded as well. Excluding a persons secondary account details will +simply block that specific email from being listed, and is likely not +what you want to do most of the time. + +=head2 AFTER RUNNING THE TOOL + +Review the changes to make sure they are sane. If they are ok (and +they should be most of the time) commit. If they are not then update +the F or F<.mailmap> files as is appropriate and run the +tool again. + +Do not panic that your email details get added to F<.mailmap>, this is +by design so that your chosen name and email are displayed on GitHub and +in casual use of C and other C tooling. + +=head1 RECIPES + + perl Porting/updateAUTHORS.pl --who --from=v5.31.6 --to=v5.31.7 + perl Porting/updateAUTHORS.pl --who v5.31.6..v5.31.7 + perl Porting/updateAUTHORS.pl --rank --percentage --from=v5.31.6 + perl Porting/updateAUTHORS.pl --thanks-applied --from=v5.31.6 + perl Porting/updateAUTHORS.pl --tap --from=v5.31.6 + perl Porting/updateAUTHORS.pl --files --from=v5.31.6 + perl Porting/updateAUTHORS.pl --activity --from=v5.31.6 + perl Porting/updateAUTHORS.pl --chainsaw v5.31.6..HEAD + perl Porting/updateAUTHORS.pl --change-name "Old Name"="New Name" + perl Porting/updateAUTHORS.pl --change-name-for-email "x@y.com"="Name" + perl Porting/updateAUTHORS.pl --change-email-for-name "Name"="p@q.com" + +=head1 RELATED FILES + +F, F<.mailmap>, F + +=head1 TODO + +More documentation and testing. =head1 AUTHOR Yves Orton +=head1 THANKS + +Loosely based on the older F script which this tool +replaced. Thanks to the contributors of that tool. See the Perl change log. + =cut