2 package Porting::updateAUTHORS;
5 use Getopt::Long qw(GetOptions);
6 use Pod::Usage qw(pod2usage);
8 use Encode qw(encode_utf8 decode_utf8 decode);
10 # The style of this file is determined by:
12 # perltidy -w -ple -bbb -bbc -bbs -nolq -l=80 -noll -nola -nwls='=' \
13 # -isbc -nolc -otr -kis -ci=4 -se -sot -sct -nsbl -pt=2 -fs \
14 # -fsb='#start-no-tidy' -fse='#end-no-tidy'
16 # Info and config for passing to git log.
18 # %aN: author name (respecting .mailmap, see git-shortlog(1) or git-blame(1))
20 # %aE: author email (respecting .mailmap, see git-shortlog(1) or git-blame(1))
22 # %cN: committer name (respecting .mailmap, see git-shortlog(1) or git-blame(1))
23 # %ce: committer email
24 # %cE: committer email (respecting .mailmap, see git-shortlog(1) or git-blame(1))
26 # %h: abbreviated commit hash
28 # %x00: print a byte from a hex code
31 "an" => "author_name",
32 "aN" => "author_name_mm",
33 "ae" => "author_email",
34 "aE" => "author_email_mm",
35 "cn" => "committer_name",
36 "cN" => "committer_name_mm",
37 "ce" => "committer_email",
38 "cE" => "committer_email_mm",
41 "s" => "commit_subject",
44 my @field_codes= sort keys %field_spec;
45 my @field_names= map { $field_spec{$_} } @field_codes;
46 my $tformat= join "%x00", map { "%" . $_ } @field_codes;
48 sub _make_name_author_info {
49 my ($author_info, $commit_info, $name_key)= @_;
50 (my $email_key= $name_key) =~ s/name/email/;
51 my $email= $commit_info->{$email_key};
52 my $name= $commit_info->{$name_key};
54 my $line= $author_info->{"email2line"}{$email}
55 // $author_info->{"name2line"}{$name};
57 $line //= sprintf "%-31s<%s>",
58 $commit_info->{$name_key}, $commit_info->{$email_key};
62 sub _make_name_simple {
63 my ($commit_info, $key)= @_;
64 my $name_key= $key . "_name";
65 my $email_key= $key . "_email";
66 return sprintf "%s <%s>", $commit_info->{$name_key},
67 lc($commit_info->{$email_key});
71 my ($author_info, $mailmap_info)= @_;
73 open my $fh, qq(git log --pretty='tformat:$tformat' |);
75 while (defined(my $line= <$fh>)) {
77 $line= decode_utf8($line);
79 @{$commit_info}{@field_names}= split /\0/, $line, 0 + @field_names;
81 my $author_name_mm= _make_name_author_info($author_info, $commit_info,
84 my $committer_name_mm=
85 _make_name_author_info($author_info, $commit_info,
88 my $author_name_real= _make_name_simple($commit_info, "author");
90 my $committer_name_real= _make_name_simple($commit_info, "committer");
93 $mailmap_info, $author_name_mm, $author_name_real,
94 $commit_info, "author name"
96 _check_name_mailmap($mailmap_info, $committer_name_mm,
97 $committer_name_real, $commit_info, "committer name");
99 $author_info->{"lines"}{$author_name_mm}++;
100 $author_info->{"lines"}{$committer_name_mm}++;
106 my ($authors_file)= @_;
107 $authors_file ||= "AUTHORS";
109 my @authors_preamble;
110 open my $in_fh, "<", $authors_file
111 or die "Failed to open for read '$authors_file': $!";
112 while (defined(my $line= <$in_fh>)) {
114 push @authors_preamble, $line;
115 if ($line =~ /^--/) {
120 while (defined(my $line= <$in_fh>)) {
122 $line= decode_utf8($line);
126 if ($copy =~ s/<([^<>]*)>//) {
129 elsif ($copy =~ s/\s+(\@\w+)\z//) {
134 $email //= "unknown";
137 $author_info{"lines"}{$line}++;
138 $author_info{"email2line"}{$email}= $line
139 if $email and $email ne "unknown";
140 $author_info{"name2line"}{$name}= $line
141 if $name and $name ne "unknown";
142 $author_info{"email2name"}{ lc($email) }= $name
145 and $email ne "unknown";
146 $author_info{"name2email"}{$name}= $email
147 if $name and $name ne "unknown";
150 or die "Failed to close '$authors_file': $!";
151 return (\%author_info, \@authors_preamble);
155 my ($author_info, $authors_preamble, $authors_file)= @_;
156 $authors_file ||= "AUTHORS";
157 my $authors_file_new= $authors_file . ".new";
158 open my $out_fh, ">", $authors_file_new
159 or die "Failed to open for write '$authors_file_new': $!";
161 foreach my $line (@$authors_preamble) {
162 print $out_fh encode_utf8($line), "\n"
163 or die "Failed to print to '$authors_file_new': $!";
165 foreach my $author (_sorted_hash_keys($author_info->{"lines"})) {
166 next if $author =~ /^unknown/;
167 if ($author =~ s/\s*<unknown>\z//) {
168 next if $author =~ /^\w+$/;
170 print $out_fh encode_utf8($author), "\n"
171 or die "Failed to print to '$authors_file_new': $!";
174 or die "Failed to close '$authors_file_new': $!";
175 rename $authors_file_new, $authors_file
176 or die "Failed to rename '$authors_file_new' to '$authors_file':$!";
181 my ($mailmap_file)= @_;
182 $mailmap_file ||= ".mailmap";
184 open my $in, "<", $mailmap_file
185 or die "Failed to read '$mailmap_file': $!";
187 my @mailmap_preamble;
189 while (defined(my $line= <$in>)) {
191 next unless $line =~ /\S/;
193 $line= decode_utf8($line);
195 if (!keys %mailmap_hash) {
196 push @mailmap_preamble, $line;
199 die encode_utf8 "Not expecting comments after header ",
200 "finished at line $line_num!\nLine: $line\n";
204 $mailmap_hash{$line}= $line_num;
208 return \%mailmap_hash, \@mailmap_preamble;
211 # this can be used to extract data from the checkAUTHORS data
212 sub merge_mailmap_with_AUTHORS_and_checkAUTHORS_data {
213 my ($mailmap_hash, $author_info)= @_;
214 require 'Porting/checkAUTHORS.pl' or die "No authors?";
215 my ($map, $preferred_email_or_github)=
216 Porting::checkAUTHORS::generate_known_author_map();
218 foreach my $old (sort keys %$preferred_email_or_github) {
219 my $new= $preferred_email_or_github->{$old};
220 next if $old !~ /\@/ or $new !~ /\@/ or $new eq $old;
221 my $name= $author_info->{"email2name"}{$new};
223 my $line= "$name <$new> <$old>";
224 $mailmap_hash->{$line}++;
230 sub _sorted_hash_keys {
232 my @sorted= sort { lc($a) cmp lc($b) || $a cmp $b } keys %$hash;
237 my ($mailmap_hash, $mailmap_preamble, $mailmap_file)= @_;
238 $mailmap_file ||= ".mailmap";
240 my $mailmap_file_new= $mailmap_file . "_new";
241 open my $out, ">", $mailmap_file_new
242 or die "Failed to write '$mailmap_file_new':$!";
244 foreach my $line (@$mailmap_preamble, _sorted_hash_keys($mailmap_hash),) {
245 print $out encode_utf8($line), "\n"
246 or die "Failed to print to '$mailmap_file': $!";
249 rename $mailmap_file_new, $mailmap_file
250 or die "Failed to rename '$mailmap_file_new' to '$mailmap_file':$!";
254 sub parse_mailmap_hash {
255 my ($mailmap_hash)= @_;
257 foreach my $line (sort keys %$mailmap_hash) {
258 my $line_num= $mailmap_hash->{$line};
259 $line =~ /^ \s* (?: ( [^<>]*? ) \s+ )? <([^<>]*)>
260 (?: \s+ (?: ( [^<>]*? ) \s+ )? <([^<>]*)> )? \s* \z /x
261 or die encode_utf8 "Failed to parse line num $line_num: '$line'";
263 die encode_utf8 "Both preferred name and email are mandatory ",
264 "in line num $line_num: '$line'";
267 # [ preferred_name, preferred_email, other_name, other_email ]
268 push @recs, [ $1, $2, $3, $4, $line_num ];
274 my ($hash, $root_key, $key, $val, $pretty_name)= @_;
275 $hash->{$root_key}{$key} //= $val;
276 my $prev= $hash->{$root_key}{$key};
278 die encode_utf8 "Collision on mapping $root_key: "
279 . " '$key' maps to '$prev' and '$val'\n";
283 my $O2P= "other2preferred";
284 my $O2PN= "other2preferred_name";
285 my $O2PE= "other2preferred_email";
286 my $P2O= "preferred2other";
287 my $N2P= "name2preferred";
288 my $E2P= "email2preferred";
290 my $blurb= ""; # FIXME - replace with a nice message
292 sub _check_name_mailmap {
293 my ($mailmap_info, $auth_name, $raw_name, $commit_info, $descr)= @_;
294 my $name= $auth_name;
295 $name =~ s/<([^<>]+)>/<\L$1\E>/
296 or $name =~ s/(\s)(\@\w+)\z/$1<\L$2\E>/
297 or $name .= " <unknown>";
301 if (!$mailmap_info->{$P2O}{$name}) {
302 warn encode_utf8 sprintf "Unknown %s '%s' in commit %s '%s'\n%s",
304 $name, $commit_info->{"abbrev_hash"},
305 $commit_info->{"commit_subject"},
307 $mailmap_info->{add}{"$name $raw_name"}++;
310 elsif (!$mailmap_info->{$P2O}{$name}{$raw_name}) {
311 $mailmap_info->{add}{"$name $raw_name"}++;
316 sub check_fix_mailmap_hash {
317 my ($mailmap_hash, $authors_info)= @_;
318 my $parsed= parse_mailmap_hash($mailmap_hash);
323 # first pass through the data, do any conversions, eg, LC
324 # the email address, decode any MIME-Header style email addresses.
325 # We also correct any preferred name entries so they match what
326 # we already have in AUTHORS, and check that there aren't collisions
327 # or other issues in the data.
328 foreach my $rec (@$parsed) {
329 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec;
330 $pemail= lc($pemail);
331 $oemail= lc($oemail) if defined $oemail;
332 if ($pname =~ /=\?UTF-8\?/) {
333 $pname= decode("MIME-Header", $pname);
335 my $auth_email= $authors_info->{"name2email"}{$pname};
337 ## this name exists in authors, so use its email data for pemail
338 $pemail= $auth_email;
340 my $auth_name= $authors_info->{"email2name"}{$pemail};
342 ## this email exists in authors, so use its name data for pname
346 # neither name nor email exist in authors.
347 if ($pname ne "unknown") {
348 if (my $email= $seen_map{"name"}{$pname}) {
349 ## we have seen this pname before, check the pemail
351 if ($email ne $pemail) {
352 warn encode_utf8 "Inconsistent emails for name '$pname'"
353 . " at line num $line_num: keeping '$email',"
354 . " ignoring '$pemail'\n";
359 $seen_map{"name"}{$pname}= $pemail;
362 if ($pemail ne "unknown") {
363 if (my $name= $seen_map{"email"}{$pemail}) {
364 ## we have seen this preferred_email before, check the preferred_name
366 if ($name ne $pname) {
367 warn encode_utf8 "Inconsistent name for email '$pemail'"
368 . " at line num $line_num: keeping '$name', ignoring"
374 $seen_map{"email"}{$pemail}= $pname;
378 # Build an index of "preferred name/email" to other-email, other name
379 # we use this later to remove redundant entries missing a name.
380 $pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }=
381 [ $pname, $pemail, $oname, $oemail, $line_num ];
384 # this removes entries like
385 # Joe <blogs> <whatever>
386 # where there is a corresponding
387 # Joe <blogs> Joe X <blogs>
388 foreach my $pref (_sorted_hash_keys(\%pref_groups)) {
389 my $entries= $pref_groups{$pref};
390 foreach my $email (_sorted_hash_keys($entries)) {
391 my @names= _sorted_hash_keys($entries->{$email});
392 if ($names[0] eq "" and @names > 1) {
395 foreach my $name (@names) {
396 push @fixed, $entries->{$email}{$name};
401 # final pass through the dataset, build up a database
402 # we will use later for checks and updates, and reconstruct
403 # the canonical entries.
404 my $new_mailmap_hash= {};
405 my $mailmap_info= {};
406 foreach my $rec (@fixed) {
407 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec;
408 my $preferred= "$pname <$pemail>";
410 if (defined $oemail) {
411 $other= $oname ? "$oname <$oemail>" : "<$oemail>";
413 if ($other and $other ne "<unknown>") {
414 _safe_set_key($mailmap_info, $O2P, $other, $preferred);
415 _safe_set_key($mailmap_info, $O2PN, $other, $pname);
416 _safe_set_key($mailmap_info, $O2PE, $other, $pemail);
418 $mailmap_info->{$P2O}{$preferred}{$other}++;
419 if ($pname ne "unknown") {
420 _safe_set_key($mailmap_info, $N2P, $pname, $preferred);
422 if ($pemail ne "unknown") {
423 _safe_set_key($mailmap_info, $E2P, $pemail, $preferred);
425 my $line= $preferred;
426 $line .= " $other" if $other;
427 $new_mailmap_hash->{$line}= $line_num;
429 return ($new_mailmap_hash, $mailmap_info);
432 sub add_new_mailmap_entries {
433 my ($mailmap_hash, $mailmap_info, $mailmap_file)= @_;
435 my $mailmap_add= $mailmap_info->{add}
439 for my $new (sort keys %$mailmap_add) {
440 !$mailmap_hash->{$new}++ or next;
441 warn encode_utf8 "Updating '$mailmap_file' with: $new\n";
447 sub read_and_update {
448 my ($authors_file, $mailmap_file)= @_;
450 # read the authors file and extract the info it contains
451 my ($author_info, $authors_preamble)= read_authors($authors_file);
453 # read the mailmap file.
454 my ($orig_mailmap_hash, $mailmap_preamble)= read_mailmap($mailmap_file);
456 # check and possibly fix the mailmap data, and build a set of precomputed
457 # datasets to work with it.
458 my ($mailmap_hash, $mailmap_info)=
459 check_fix_mailmap_hash($orig_mailmap_hash, $author_info);
461 # update the mailmap based on any check or fixes we just did,
462 # we always write even if we did not do any changes.
463 update_mailmap($mailmap_hash, $mailmap_preamble, $mailmap_file);
465 # read the commits names using git log, and compares and checks
466 # them against the data we have in authors.
467 read_commit_log($author_info, $mailmap_info);
469 # update the authors file with any changes, we always write,
470 # but we may not change anything
471 update_authors($author_info, $authors_preamble, $authors_file);
473 # check if we discovered new email data from the commits that
474 # we need to write back to disk.
475 add_new_mailmap_entries($mailmap_hash, $mailmap_info, $mailmap_file)
476 and update_mailmap($mailmap_hash, $mailmap_preamble,
477 $mailmap_file, $mailmap_info);
483 local $Data::Dumper::Sortkeys= 1;
484 my $authors_file= "AUTHORS";
485 my $mailmap_file= ".mailmap";
489 ## Parse options and print usage if there is a syntax error,
490 ## or if usage was explicitly requested.
492 'help|?' => \$show_help,
494 'authors_file|authors-file=s' => \$authors_file,
495 'mailmap_file|mailmap-file=s' => \$mailmap_file,
497 pod2usage(1) if $show_help;
498 pod2usage(-verbose => 2) if $show_man;
500 read_and_update($authors_file, $mailmap_file);
501 return 0; # 0 for no error - intended for exit();
504 exit(main()) unless caller;
511 Porting/updateAUTHORS.pl - Automatically update AUTHORS and .mailmap
512 based on commit data.
516 Porting/updateAUTHORS.pl
519 --help brief help message
520 --man full documentation
521 --authors-file=FILE override default location of AUTHORS
522 --mailmap-file=FILE override default location of .mailmap
530 Print a brief help message and exits.
534 Prints the manual page and exits.
536 =item --authors-file=FILE
538 =item --authors_file=FILE
540 Override the default location of the authors file, which is "AUTHORS" in
541 the current directory.
543 =item --mailmap-file=FILE
545 =item --mailmap_file=FILE
547 Override the default location of the mailmap file, which is ".mailmap"
548 in the current directory.
554 This program will automatically manage updates to the AUTHORS file and
555 .mailmap file based on the data in our commits and the data in the files
556 themselves. It uses no other sources of data. Expects to be run from
557 the root a git repo of perl.
559 In simple, execute the script and it will either die with a helpful
560 message or it will update the files as necessary, possibly not at all if
561 there is no need to do so. Note it will actually rewrite the files at
562 least once, but it may not actually make any changes to their content.
563 Thus to use the script is currently required that the files are
566 Review the changes it makes to make sure they are sane. If they are
567 commit. If they are not then update the AUTHORS or .mailmap files as is
568 appropriate and run the tool again. Typically you shouldn't need to do
569 either unless you are changing the default name or email for a user. For
570 instance if a person currently listed in the AUTHORS file whishes to
571 change their preferred name or email then change it in the AUTHORS file
572 and run the script again. I am not sure when you might need to directly
573 modify .mailmap, usually modifying the AUTHORS file should suffice.
577 Note that the file can also be used as a package. If you require the
578 file then you can access the functions located within the package
579 C<Porting::updateAUTHORS>. These are as follows:
583 =item add_new_mailmap_entries($mailmap_hash, $mailmap_info, $mailmap_file)
585 If any additions were identified while reading the commits this will
586 inject them into the mailmap_hash so they can be written out. Returns a
587 count of additions found.
589 =item check_fix_mailmap_hash($mailmap_hash, $authors_info)
591 Analyzes the data contained the in the .mailmap file and applies any
592 automated fixes which are required and which it can automatically
593 perform. Returns a hash of adjusted entries and a hash with additional
594 metadata about the mailmap entries.
598 This implements the command line version of this module, handle command
601 =item merge_mailmap_with_AUTHORS_and_checkAUTHORS_data
603 This is a utility function that combines data from this tool with data
604 contained in F<Porting/checkAUTHORS.pl> it is not used directly, but was
605 used to cleanup and generate the current version of the .mailmap file.
607 =item parse_mailmap_hash($mailmap_hash)
609 Takes a mailmap_hash and parses it and returns it as an array of array
610 records with the contents:
612 [ $preferred_name, $preferred_email,
613 $other_name, $other_email,
616 =item read_and_update($authors_file, $mailmap_file)
618 Wraps the other functions in this library and implements the logic and
619 intent of this tool. Takes two arguments, the authors file name, and the
620 mailmap file name. Returns nothing but may modify the AUTHORS file
621 or the .mailmap file. Requires that both files are editable.
623 =item read_commit_log($authors_info, $mailmap_info)
625 Read the commit log and find any new names it contains.
627 =item read_authors($authors_file)
629 Read the AUTHORS file and return data about it.
631 =item read_mailmap($mailmap_file)
633 Read the .mailmap file and return data about it.
635 =item update_authors($authors_info, $authors_preamble, $authors_file)
637 Write out an updated AUTHORS file. This is done atomically
638 using a rename, we will not leave a half modified file in
641 =item update_mailmap($mm_hash, $mm_preamble, $mailmap_file, $mm_info)
643 Write out an updated .mailmap file. This is done atomically
644 using a rename, we will not leave a half modified file in
651 More documentation and testing.
655 F<Porting/checkAUTHORS.pl>
659 Yves Orton <demerphq@gmail.com>