2 package Porting::updateAUTHORS;
5 use Getopt::Long qw(GetOptions);
6 use Pod::Usage qw(pod2usage);
8 use Encode qw(encode_utf8 decode_utf8 decode);
10 # The style of this file is determined by:
12 # perltidy -w -ple -bbb -bbc -bbs -nolq -l=80 -noll -nola -nwls='=' \
13 # -isbc -nolc -otr -kis -ci=4 -se -sot -sct -nsbl -pt=2 -fs \
14 # -fsb='#start-no-tidy' -fse='#end-no-tidy'
16 # Info and config for passing to git log.
18 # %aN: author name (respecting .mailmap, see git-shortlog(1) or git-blame(1))
20 # %aE: author email (respecting .mailmap, see git-shortlog(1) or git-blame(1))
22 # %cN: committer name (respecting .mailmap, see git-shortlog(1) or git-blame(1))
23 # %ce: committer email
24 # %cE: committer email (respecting .mailmap, see git-shortlog(1) or git-blame(1))
26 # %h: abbreviated commit hash
28 # %x00: print a byte from a hex code
31 "an" => "author_name",
32 "aN" => "author_name_mm",
33 "ae" => "author_email",
34 "aE" => "author_email_mm",
35 "cn" => "committer_name",
36 "cN" => "committer_name_mm",
37 "ce" => "committer_email",
38 "cE" => "committer_email_mm",
41 "s" => "commit_subject",
44 my @field_codes= sort keys %field_spec;
45 my @field_names= map { $field_spec{$_} } @field_codes;
46 my $tformat= join "%x00", map { "%" . $_ } @field_codes;
48 sub _make_name_author_info {
49 my ($author_info, $commit_info, $name_key)= @_;
50 (my $email_key= $name_key) =~ s/name/email/;
51 my $email= $commit_info->{$email_key};
52 my $name= $commit_info->{$name_key};
54 my $line= $author_info->{"email2line"}{$email}
55 // $author_info->{"name2line"}{$name};
57 $line //= sprintf "%-31s<%s>",
58 $commit_info->{$name_key}, $commit_info->{$email_key};
62 sub _make_name_simple {
63 my ($commit_info, $key)= @_;
64 my $name_key= $key . "_name";
65 my $email_key= $key . "_email";
66 return sprintf "%s <%s>", $commit_info->{$name_key},
67 lc($commit_info->{$email_key});
71 my ($author_info, $mailmap_info)= @_;
73 open my $fh, qq(git log --pretty='tformat:$tformat' |);
75 while (defined(my $line= <$fh>)) {
77 $line= decode_utf8($line);
79 @{$commit_info}{@field_names}= split /\0/, $line, 0 + @field_names;
81 my $author_name_mm= _make_name_author_info($author_info, $commit_info,
84 my $committer_name_mm=
85 _make_name_author_info($author_info, $commit_info,
88 my $author_name_real= _make_name_simple($commit_info, "author");
90 my $committer_name_real= _make_name_simple($commit_info, "committer");
93 $mailmap_info, $author_name_mm, $author_name_real,
94 $commit_info, "author name"
96 _check_name_mailmap($mailmap_info, $committer_name_mm,
97 $committer_name_real, $commit_info, "committer name");
99 $author_info->{"lines"}{$author_name_mm}++;
100 $author_info->{"lines"}{$committer_name_mm}++;
106 my ($authors_file)= @_;
107 $authors_file ||= "AUTHORS";
109 my @authors_preamble;
110 open my $in_fh, "<", $authors_file
111 or die "Failed to open for read '$authors_file': $!";
112 while (defined(my $line= <$in_fh>)) {
114 push @authors_preamble, $line;
115 if ($line =~ /^--/) {
120 while (defined(my $line= <$in_fh>)) {
122 $line= decode_utf8($line);
126 if ($copy =~ s/<([^<>]*)>//) {
129 elsif ($copy =~ s/\s+(\@\w+)\z//) {
134 $email //= "unknown";
137 $author_info{"lines"}{$line}++;
138 $author_info{"email2line"}{$email}= $line
139 if $email and $email ne "unknown";
140 $author_info{"name2line"}{$name}= $line
141 if $name and $name ne "unknown";
142 $author_info{"email2name"}{ lc($email) }= $name
145 and $email ne "unknown";
146 $author_info{"name2email"}{$name}= $email
147 if $name and $name ne "unknown";
150 or die "Failed to close '$authors_file': $!";
151 return (\%author_info, \@authors_preamble);
155 my ($author_info, $authors_preamble, $authors_file)= @_;
156 $authors_file ||= "AUTHORS";
157 my $authors_file_new= $authors_file . ".new";
158 open my $out_fh, ">", $authors_file_new
159 or die "Failed to open for write '$authors_file_new': $!";
160 foreach my $line (@$authors_preamble) {
161 print $out_fh encode_utf8($line), "\n"
162 or die "Failed to print to '$authors_file_new': $!";
164 foreach my $author (_sorted_hash_keys($author_info->{"lines"})) {
165 next if $author =~ /^unknown/;
166 if ($author =~ s/\s*<unknown>\z//) {
167 next if $author =~ /^\w+$/;
169 print $out_fh encode_utf8($author), "\n"
170 or die "Failed to print to '$authors_file_new': $!";
173 or die "Failed to close '$authors_file_new': $!";
174 rename $authors_file_new, $authors_file
175 or die "Failed to rename '$authors_file_new' to '$authors_file':$!";
180 my ($mailmap_file)= @_;
181 $mailmap_file ||= ".mailmap";
183 open my $in, "<", $mailmap_file
184 or die "Failed to read '$mailmap_file': $!";
186 my @mailmap_preamble;
188 while (defined(my $line= <$in>)) {
190 next unless $line =~ /\S/;
192 $line= decode_utf8($line);
194 if (!keys %mailmap_hash) {
195 push @mailmap_preamble, $line;
198 die encode_utf8 "Not expecting comments after header ",
199 "finished at line $line_num!\nLine: $line\n";
203 $mailmap_hash{$line}= $line_num;
207 return \%mailmap_hash, \@mailmap_preamble;
210 # this can be used to extract data from the checkAUTHORS data
211 sub merge_mailmap_with_AUTHORS_and_checkAUTHORS_data {
212 my ($mailmap_hash, $author_info)= @_;
213 require 'Porting/checkAUTHORS.pl' or die "No authors?";
214 my ($map, $preferred_email_or_github)=
215 Porting::checkAUTHORS::generate_known_author_map();
217 foreach my $old (sort keys %$preferred_email_or_github) {
218 my $new= $preferred_email_or_github->{$old};
219 next if $old !~ /\@/ or $new !~ /\@/ or $new eq $old;
220 my $name= $author_info->{"email2name"}{$new};
222 my $line= "$name <$new> <$old>";
223 $mailmap_hash->{$line}++;
229 sub _sorted_hash_keys {
231 my @sorted= sort { lc($a) cmp lc($b) || $a cmp $b } keys %$hash;
236 my ($mailmap_hash, $mailmap_preamble, $mailmap_file)= @_;
237 $mailmap_file ||= ".mailmap";
239 my $mailmap_file_new= $mailmap_file . "_new";
240 open my $out, ">", $mailmap_file_new
241 or die "Failed to write '$mailmap_file_new':$!";
242 foreach my $line (@$mailmap_preamble, _sorted_hash_keys($mailmap_hash),) {
243 print $out encode_utf8($line), "\n"
244 or die "Failed to print to '$mailmap_file': $!";
247 rename $mailmap_file_new, $mailmap_file
248 or die "Failed to rename '$mailmap_file_new' to '$mailmap_file':$!";
252 sub parse_mailmap_hash {
253 my ($mailmap_hash)= @_;
255 foreach my $line (sort keys %$mailmap_hash) {
256 my $line_num= $mailmap_hash->{$line};
257 $line =~ /^ \s* (?: ( [^<>]*? ) \s+ )? <([^<>]*)>
258 (?: \s+ (?: ( [^<>]*? ) \s+ )? <([^<>]*)> )? \s* \z /x
259 or die encode_utf8 "Failed to parse line num $line_num: '$line'";
261 die encode_utf8 "Both preferred name and email are mandatory ",
262 "in line num $line_num: '$line'";
265 # [ preferred_name, preferred_email, other_name, other_email ]
266 push @recs, [ $1, $2, $3, $4, $line_num ];
272 my ($hash, $root_key, $key, $val, $pretty_name)= @_;
273 $hash->{$root_key}{$key} //= $val;
274 my $prev= $hash->{$root_key}{$key};
276 die encode_utf8 "Collision on mapping $root_key: "
277 . " '$key' maps to '$prev' and '$val'\n";
281 my $O2P= "other2preferred";
282 my $O2PN= "other2preferred_name";
283 my $O2PE= "other2preferred_email";
284 my $P2O= "preferred2other";
285 my $N2P= "name2preferred";
286 my $E2P= "email2preferred";
288 my $blurb= ""; # FIXME - replace with a nice message
290 sub _check_name_mailmap {
291 my ($mailmap_info, $auth_name, $raw_name, $commit_info, $descr)= @_;
292 my $name= $auth_name;
293 $name =~ s/<([^<>]+)>/<\L$1\E>/
294 or $name =~ s/(\s)(\@\w+)\z/$1<\L$2\E>/
295 or $name .= " <unknown>";
299 if (!$mailmap_info->{$P2O}{$name}) {
300 warn encode_utf8 sprintf "Unknown %s '%s' in commit %s '%s'\n%s",
302 $name, $commit_info->{"abbrev_hash"},
303 $commit_info->{"commit_subject"},
305 $mailmap_info->{add}{"$name $raw_name"}++;
308 elsif (!$mailmap_info->{$P2O}{$name}{$raw_name}) {
309 $mailmap_info->{add}{"$name $raw_name"}++;
314 sub check_fix_mailmap_hash {
315 my ($mailmap_hash, $authors_info)= @_;
316 my $parsed= parse_mailmap_hash($mailmap_hash);
321 # first pass through the data, do any conversions, eg, LC
322 # the email address, decode any MIME-Header style email addresses.
323 # We also correct any preferred name entries so they match what
324 # we already have in AUTHORS, and check that there aren't collisions
325 # or other issues in the data.
326 foreach my $rec (@$parsed) {
327 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec;
328 $pemail= lc($pemail);
329 $oemail= lc($oemail) if defined $oemail;
330 if ($pname =~ /=\?UTF-8\?/) {
331 $pname= decode("MIME-Header", $pname);
333 my $auth_email= $authors_info->{"name2email"}{$pname};
335 ## this name exists in authors, so use its email data for pemail
336 $pemail= $auth_email;
338 my $auth_name= $authors_info->{"email2name"}{$pemail};
340 ## this email exists in authors, so use its name data for pname
344 # neither name nor email exist in authors.
345 if ($pname ne "unknown") {
346 if (my $email= $seen_map{"name"}{$pname}) {
347 ## we have seen this pname before, check the pemail
349 if ($email ne $pemail) {
350 warn encode_utf8 "Inconsistent emails for name '$pname'"
351 . " at line num $line_num: keeping '$email',"
352 . " ignoring '$pemail'\n";
357 $seen_map{"name"}{$pname}= $pemail;
360 if ($pemail ne "unknown") {
361 if (my $name= $seen_map{"email"}{$pemail}) {
362 ## we have seen this preferred_email before, check the preferred_name
364 if ($name ne $pname) {
365 warn encode_utf8 "Inconsistent name for email '$pemail'"
366 . " at line num $line_num: keeping '$name', ignoring"
372 $seen_map{"email"}{$pemail}= $pname;
376 # Build an index of "preferred name/email" to other-email, other name
377 # we use this later to remove redundant entries missing a name.
378 $pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }=
379 [ $pname, $pemail, $oname, $oemail, $line_num ];
382 # this removes entries like
383 # Joe <blogs> <whatever>
384 # where there is a corresponding
385 # Joe <blogs> Joe X <blogs>
386 foreach my $pref (_sorted_hash_keys(\%pref_groups)) {
387 my $entries= $pref_groups{$pref};
388 foreach my $email (_sorted_hash_keys($entries)) {
389 my @names= _sorted_hash_keys($entries->{$email});
390 if ($names[0] eq "" and @names > 1) {
393 foreach my $name (@names) {
394 push @fixed, $entries->{$email}{$name};
399 # final pass through the dataset, build up a database
400 # we will use later for checks and updates, and reconstruct
401 # the canonical entries.
402 my $new_mailmap_hash= {};
403 my $mailmap_info= {};
404 foreach my $rec (@fixed) {
405 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec;
406 my $preferred= "$pname <$pemail>";
408 if (defined $oemail) {
409 $other= $oname ? "$oname <$oemail>" : "<$oemail>";
411 if ($other and $other ne "<unknown>") {
412 _safe_set_key($mailmap_info, $O2P, $other, $preferred);
413 _safe_set_key($mailmap_info, $O2PN, $other, $pname);
414 _safe_set_key($mailmap_info, $O2PE, $other, $pemail);
416 $mailmap_info->{$P2O}{$preferred}{$other}++;
417 if ($pname ne "unknown") {
418 _safe_set_key($mailmap_info, $N2P, $pname, $preferred);
420 if ($pemail ne "unknown") {
421 _safe_set_key($mailmap_info, $E2P, $pemail, $preferred);
423 my $line= $preferred;
424 $line .= " $other" if $other;
425 $new_mailmap_hash->{$line}= $line_num;
427 return ($new_mailmap_hash, $mailmap_info);
430 sub add_new_mailmap_entries {
431 my ($mailmap_hash, $mailmap_info, $mailmap_file)= @_;
433 my $mailmap_add= $mailmap_info->{add}
437 for my $new (sort keys %$mailmap_add) {
438 !$mailmap_hash->{$new}++ or next;
439 warn encode_utf8 "Updating '$mailmap_file' with: $new\n";
445 sub read_and_update {
446 my ($authors_file, $mailmap_file)= @_;
448 # read the authors file and extract the info it contains
449 my ($author_info, $authors_preamble)= read_authors($authors_file);
451 # read the mailmap file.
452 my ($orig_mailmap_hash, $mailmap_preamble)= read_mailmap($mailmap_file);
454 # check and possibly fix the mailmap data, and build a set of precomputed
455 # datasets to work with it.
456 my ($mailmap_hash, $mailmap_info)=
457 check_fix_mailmap_hash($orig_mailmap_hash, $author_info);
459 # update the mailmap based on any check or fixes we just did,
460 # we always write even if we did not do any changes.
461 update_mailmap($mailmap_hash, $mailmap_preamble, $mailmap_file);
463 # read the commits names using git log, and compares and checks
464 # them against the data we have in authors.
465 read_commit_log($author_info, $mailmap_info);
467 # update the authors file with any changes, we always write,
468 # but we may not change anything
469 update_authors($author_info, $authors_preamble, $authors_file);
471 # check if we discovered new email data from the commits that
472 # we need to write back to disk.
473 add_new_mailmap_entries($mailmap_hash, $mailmap_info, $mailmap_file)
474 and update_mailmap($mailmap_hash, $mailmap_preamble,
475 $mailmap_file, $mailmap_info);
481 local $Data::Dumper::Sortkeys= 1;
482 my $authors_file= "AUTHORS";
483 my $mailmap_file= ".mailmap";
487 ## Parse options and print usage if there is a syntax error,
488 ## or if usage was explicitly requested.
490 'help|?' => \$show_help,
492 'authors_file|authors-file=s' => \$authors_file,
493 'mailmap_file|mailmap-file=s' => \$mailmap_file,
495 pod2usage(1) if $show_help;
496 pod2usage(-verbose => 2) if $show_man;
498 read_and_update($authors_file, $mailmap_file);
499 return 0; # 0 for no error - intended for exit();
502 exit(main()) unless caller;
509 Porting/updateAUTHORS.pl - Automatically update AUTHORS and .mailmap
510 based on commit data.
514 Porting/updateAUTHORS.pl
517 --help brief help message
518 --man full documentation
519 --authors-file=FILE override default location of AUTHORS
520 --mailmap-file=FILE override default location of .mailmap
528 Print a brief help message and exits.
532 Prints the manual page and exits.
534 =item --authors-file=FILE
536 =item --authors_file=FILE
538 Override the default location of the authors file, which is "AUTHORS" in
539 the current directory.
541 =item --mailmap-file=FILE
543 =item --mailmap_file=FILE
545 Override the default location of the mailmap file, which is ".mailmap"
546 in the current directory.
552 This program will automatically manage updates to the AUTHORS file and
553 .mailmap file based on the data in our commits and the data in the files
554 themselves. It uses no other sources of data. Expects to be run from
555 the root a git repo of perl.
557 In simple, execute the script and it will either die with a helpful
558 message or it will update the files as necessary, possibly not at all if
559 there is no need to do so. Note it will actually rewrite the files at
560 least once, but it may not actually make any changes to their content.
561 Thus to use the script is currently required that the files are
564 Review the changes it makes to make sure they are sane. If they are
565 commit. If they are not then update the AUTHORS or .mailmap files as is
566 appropriate and run the tool again. Typically you shouldn't need to do
567 either unless you are changing the default name or email for a user. For
568 instance if a person currently listed in the AUTHORS file whishes to
569 change their preferred name or email then change it in the AUTHORS file
570 and run the script again. I am not sure when you might need to directly
571 modify .mailmap, usually modifying the AUTHORS file should suffice.
575 Note that the file can also be used as a package. If you require the
576 file then you can access the functions located within the package
577 C<Porting::updateAUTHORS>. These are as follows:
581 =item add_new_mailmap_entries($mailmap_hash, $mailmap_info, $mailmap_file)
583 If any additions were identified while reading the commits this will
584 inject them into the mailmap_hash so they can be written out. Returns a
585 count of additions found.
587 =item check_fix_mailmap_hash($mailmap_hash, $authors_info)
589 Analyzes the data contained the in the .mailmap file and applies any
590 automated fixes which are required and which it can automatically
591 perform. Returns a hash of adjusted entries and a hash with additional
592 metadata about the mailmap entries.
596 This implements the command line version of this module, handle command
599 =item merge_mailmap_with_AUTHORS_and_checkAUTHORS_data
601 This is a utility function that combines data from this tool with data
602 contained in F<Porting/checkAUTHORS.pl> it is not used directly, but was
603 used to cleanup and generate the current version of the .mailmap file.
605 =item parse_mailmap_hash($mailmap_hash)
607 Takes a mailmap_hash and parses it and returns it as an array of array
608 records with the contents:
610 [ $preferred_name, $preferred_email,
611 $other_name, $other_email,
614 =item read_and_update($authors_file, $mailmap_file)
616 Wraps the other functions in this library and implements the logic and
617 intent of this tool. Takes two arguments, the authors file name, and the
618 mailmap file name. Returns nothing but may modify the AUTHORS file
619 or the .mailmap file. Requires that both files are editable.
621 =item read_commit_log($authors_info, $mailmap_info)
623 Read the commit log and find any new names it contains.
625 =item read_authors($authors_file)
627 Read the AUTHORS file and return data about it.
629 =item read_mailmap($mailmap_file)
631 Read the .mailmap file and return data about it.
633 =item update_authors($authors_info, $authors_preamble, $authors_file)
635 Write out an updated AUTHORS file. This is done atomically
636 using a rename, we will not leave a half modified file in
639 =item update_mailmap($mm_hash, $mm_preamble, $mailmap_file, $mm_info)
641 Write out an updated .mailmap file. This is done atomically
642 using a rename, we will not leave a half modified file in
649 More documentation and testing.
653 F<Porting/checkAUTHORS.pl>
657 Yves Orton <demerphq@gmail.com>