This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Bump version for 5.35.11
[perl5.git] / Porting / updateAUTHORS.pl
CommitLineData
a5931453
YO
1#!/usr/bin/env perl
2package Porting::updateAUTHORS;
3use strict;
4use warnings;
5use Getopt::Long qw(GetOptions);
6use Pod::Usage qw(pod2usage);
7use Data::Dumper;
8use Encode qw(encode_utf8 decode_utf8 decode);
9
10# The style of this file is determined by:
11#
12# perltidy -w -ple -bbb -bbc -bbs -nolq -l=80 -noll -nola -nwls='=' \
13# -isbc -nolc -otr -kis -ci=4 -se -sot -sct -nsbl -pt=2 -fs \
14# -fsb='#start-no-tidy' -fse='#end-no-tidy'
15
16# Info and config for passing to git log.
17# %an: author name
18# %aN: author name (respecting .mailmap, see git-shortlog(1) or git-blame(1))
19# %ae: author email
20# %aE: author email (respecting .mailmap, see git-shortlog(1) or git-blame(1))
21# %cn: committer name
22# %cN: committer name (respecting .mailmap, see git-shortlog(1) or git-blame(1))
23# %ce: committer email
24# %cE: committer email (respecting .mailmap, see git-shortlog(1) or git-blame(1))
25# %H: commit hash
26# %h: abbreviated commit hash
27# %s: subject
28# %x00: print a byte from a hex code
29
30my %field_spec= (
31 "an" => "author_name",
32 "aN" => "author_name_mm",
33 "ae" => "author_email",
34 "aE" => "author_email_mm",
35 "cn" => "committer_name",
36 "cN" => "committer_name_mm",
37 "ce" => "committer_email",
38 "cE" => "committer_email_mm",
39 "H" => "commit_hash",
40 "h" => "abbrev_hash",
41 "s" => "commit_subject",
42);
43
44my @field_codes= sort keys %field_spec;
45my @field_names= map { $field_spec{$_} } @field_codes;
46my $tformat= join "%x00", map { "%" . $_ } @field_codes;
47
48sub _make_name_author_info {
49 my ($author_info, $commit_info, $name_key)= @_;
50 (my $email_key= $name_key) =~ s/name/email/;
51 my $email= $commit_info->{$email_key};
52 my $name= $commit_info->{$name_key};
53
54 my $line= $author_info->{"email2line"}{$email}
55 // $author_info->{"name2line"}{$name};
56
57 $line //= sprintf "%-31s<%s>",
58 $commit_info->{$name_key}, $commit_info->{$email_key};
59 return $line;
60}
61
62sub _make_name_simple {
63 my ($commit_info, $key)= @_;
64 my $name_key= $key . "_name";
65 my $email_key= $key . "_email";
66 return sprintf "%s <%s>", $commit_info->{$name_key},
67 lc($commit_info->{$email_key});
68}
69
70sub read_commit_log {
71 my ($author_info, $mailmap_info)= @_;
72 $author_info ||= {};
73 open my $fh, qq(git log --pretty='tformat:$tformat' |);
74
75 while (defined(my $line= <$fh>)) {
76 chomp $line;
77 $line= decode_utf8($line);
78 my $commit_info= {};
79 @{$commit_info}{@field_names}= split /\0/, $line, 0 + @field_names;
80
81 my $author_name_mm= _make_name_author_info($author_info, $commit_info,
82 "author_name_mm");
83
84 my $committer_name_mm=
85 _make_name_author_info($author_info, $commit_info,
86 "committer_name_mm");
87
88 my $author_name_real= _make_name_simple($commit_info, "author");
89
90 my $committer_name_real= _make_name_simple($commit_info, "committer");
91
92 _check_name_mailmap(
93 $mailmap_info, $author_name_mm, $author_name_real,
94 $commit_info, "author name"
95 );
96 _check_name_mailmap($mailmap_info, $committer_name_mm,
97 $committer_name_real, $commit_info, "committer name");
98
99 $author_info->{"lines"}{$author_name_mm}++;
100 $author_info->{"lines"}{$committer_name_mm}++;
101 }
102 return $author_info;
103}
104
105sub read_authors {
106 my ($authors_file)= @_;
107 $authors_file ||= "AUTHORS";
108
109 my @authors_preamble;
110 open my $in_fh, "<", $authors_file
111 or die "Failed to open for read '$authors_file': $!";
112 while (defined(my $line= <$in_fh>)) {
113 chomp $line;
114 push @authors_preamble, $line;
115 if ($line =~ /^--/) {
116 last;
117 }
118 }
119 my %author_info;
120 while (defined(my $line= <$in_fh>)) {
121 chomp $line;
122 $line= decode_utf8($line);
123 my ($name, $email);
124 my $copy= $line;
125 $copy =~ s/\s+\z//;
126 if ($copy =~ s/<([^<>]*)>//) {
127 $email= $1;
128 }
129 elsif ($copy =~ s/\s+(\@\w+)\z//) {
130 $email= $1;
131 }
132 $copy =~ s/\s+\z//;
133 $name= $copy;
134 $email //= "unknown";
135 $email= lc($email);
136
137 $author_info{"lines"}{$line}++;
138 $author_info{"email2line"}{$email}= $line
139 if $email and $email ne "unknown";
140 $author_info{"name2line"}{$name}= $line
141 if $name and $name ne "unknown";
142 $author_info{"email2name"}{ lc($email) }= $name
143 if $email
144 and $name
145 and $email ne "unknown";
146 $author_info{"name2email"}{$name}= $email
147 if $name and $name ne "unknown";
148 }
149 close $in_fh
150 or die "Failed to close '$authors_file': $!";
151 return (\%author_info, \@authors_preamble);
152}
153
154sub update_authors {
155 my ($author_info, $authors_preamble, $authors_file)= @_;
156 $authors_file ||= "AUTHORS";
157 my $authors_file_new= $authors_file . ".new";
158 open my $out_fh, ">", $authors_file_new
159 or die "Failed to open for write '$authors_file_new': $!";
160 foreach my $line (@$authors_preamble) {
161 print $out_fh encode_utf8($line), "\n"
162 or die "Failed to print to '$authors_file_new': $!";
163 }
164 foreach my $author (_sorted_hash_keys($author_info->{"lines"})) {
165 next if $author =~ /^unknown/;
166 if ($author =~ s/\s*<unknown>\z//) {
167 next if $author =~ /^\w+$/;
168 }
169 print $out_fh encode_utf8($author), "\n"
170 or die "Failed to print to '$authors_file_new': $!";
171 }
172 close $out_fh
173 or die "Failed to close '$authors_file_new': $!";
174 rename $authors_file_new, $authors_file
175 or die "Failed to rename '$authors_file_new' to '$authors_file':$!";
176 return 1; # ok
177}
178
179sub read_mailmap {
180 my ($mailmap_file)= @_;
181 $mailmap_file ||= ".mailmap";
182
183 open my $in, "<", $mailmap_file
184 or die "Failed to read '$mailmap_file': $!";
185 my %mailmap_hash;
186 my @mailmap_preamble;
187 my $line_num= 0;
188 while (defined(my $line= <$in>)) {
189 ++$line_num;
190 next unless $line =~ /\S/;
191 chomp($line);
192 $line= decode_utf8($line);
193 if ($line =~ /^#/) {
194 if (!keys %mailmap_hash) {
195 push @mailmap_preamble, $line;
196 }
197 else {
198 die encode_utf8 "Not expecting comments after header ",
199 "finished at line $line_num!\nLine: $line\n";
200 }
201 }
202 else {
203 $mailmap_hash{$line}= $line_num;
204 }
205 }
206 close $in;
207 return \%mailmap_hash, \@mailmap_preamble;
208}
209
210# this can be used to extract data from the checkAUTHORS data
211sub merge_mailmap_with_AUTHORS_and_checkAUTHORS_data {
212 my ($mailmap_hash, $author_info)= @_;
213 require 'Porting/checkAUTHORS.pl' or die "No authors?";
214 my ($map, $preferred_email_or_github)=
215 Porting::checkAUTHORS::generate_known_author_map();
216
217 foreach my $old (sort keys %$preferred_email_or_github) {
218 my $new= $preferred_email_or_github->{$old};
219 next if $old !~ /\@/ or $new !~ /\@/ or $new eq $old;
220 my $name= $author_info->{"email2name"}{$new};
221 if ($name) {
222 my $line= "$name <$new> <$old>";
223 $mailmap_hash->{$line}++;
224 }
225 }
226 return 1; # ok
227}
228
229sub _sorted_hash_keys {
230 my ($hash)= @_;
231 my @sorted= sort { lc($a) cmp lc($b) || $a cmp $b } keys %$hash;
232 return @sorted;
233}
234
235sub update_mailmap {
236 my ($mailmap_hash, $mailmap_preamble, $mailmap_file)= @_;
237 $mailmap_file ||= ".mailmap";
238
239 my $mailmap_file_new= $mailmap_file . "_new";
240 open my $out, ">", $mailmap_file_new
241 or die "Failed to write '$mailmap_file_new':$!";
242 foreach my $line (@$mailmap_preamble, _sorted_hash_keys($mailmap_hash),) {
243 print $out encode_utf8($line), "\n"
244 or die "Failed to print to '$mailmap_file': $!";
245 }
246 close $out;
247 rename $mailmap_file_new, $mailmap_file
248 or die "Failed to rename '$mailmap_file_new' to '$mailmap_file':$!";
249 return 1; # ok
250}
251
252sub parse_mailmap_hash {
253 my ($mailmap_hash)= @_;
254 my @recs;
255 foreach my $line (sort keys %$mailmap_hash) {
256 my $line_num= $mailmap_hash->{$line};
257 $line =~ /^ \s* (?: ( [^<>]*? ) \s+ )? <([^<>]*)>
258 (?: \s+ (?: ( [^<>]*? ) \s+ )? <([^<>]*)> )? \s* \z /x
259 or die encode_utf8 "Failed to parse line num $line_num: '$line'";
260 if (!$1 or !$2) {
261 die encode_utf8 "Both preferred name and email are mandatory ",
262 "in line num $line_num: '$line'";
263 }
264
265 # [ preferred_name, preferred_email, other_name, other_email ]
266 push @recs, [ $1, $2, $3, $4, $line_num ];
267 }
268 return \@recs;
269}
270
271sub _safe_set_key {
272 my ($hash, $root_key, $key, $val, $pretty_name)= @_;
273 $hash->{$root_key}{$key} //= $val;
274 my $prev= $hash->{$root_key}{$key};
275 if ($prev ne $val) {
276 die encode_utf8 "Collision on mapping $root_key: "
277 . " '$key' maps to '$prev' and '$val'\n";
278 }
279}
280
281my $O2P= "other2preferred";
282my $O2PN= "other2preferred_name";
283my $O2PE= "other2preferred_email";
284my $P2O= "preferred2other";
285my $N2P= "name2preferred";
286my $E2P= "email2preferred";
287
288my $blurb= ""; # FIXME - replace with a nice message
289
290sub _check_name_mailmap {
291 my ($mailmap_info, $auth_name, $raw_name, $commit_info, $descr)= @_;
292 my $name= $auth_name;
293 $name =~ s/<([^<>]+)>/<\L$1\E>/
294 or $name =~ s/(\s)(\@\w+)\z/$1<\L$2\E>/
295 or $name .= " <unknown>";
296
297 $name =~ s/\s+/ /g;
298
299 if (!$mailmap_info->{$P2O}{$name}) {
300 warn encode_utf8 sprintf "Unknown %s '%s' in commit %s '%s'\n%s",
301 $descr,
302 $name, $commit_info->{"abbrev_hash"},
303 $commit_info->{"commit_subject"},
304 $blurb;
305 $mailmap_info->{add}{"$name $raw_name"}++;
306 return 0;
307 }
308 elsif (!$mailmap_info->{$P2O}{$name}{$raw_name}) {
309 $mailmap_info->{add}{"$name $raw_name"}++;
310 }
311 return 1;
312}
313
314sub check_fix_mailmap_hash {
315 my ($mailmap_hash, $authors_info)= @_;
316 my $parsed= parse_mailmap_hash($mailmap_hash);
317 my @fixed;
318 my %seen_map;
319 my %pref_groups;
320
321 # first pass through the data, do any conversions, eg, LC
322 # the email address, decode any MIME-Header style email addresses.
323 # We also correct any preferred name entries so they match what
324 # we already have in AUTHORS, and check that there aren't collisions
325 # or other issues in the data.
326 foreach my $rec (@$parsed) {
327 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec;
328 $pemail= lc($pemail);
329 $oemail= lc($oemail) if defined $oemail;
330 if ($pname =~ /=\?UTF-8\?/) {
331 $pname= decode("MIME-Header", $pname);
332 }
333 my $auth_email= $authors_info->{"name2email"}{$pname};
334 if ($auth_email) {
335 ## this name exists in authors, so use its email data for pemail
336 $pemail= $auth_email;
337 }
338 my $auth_name= $authors_info->{"email2name"}{$pemail};
339 if ($auth_name) {
340 ## this email exists in authors, so use its name data for pname
341 $pname= $auth_name;
342 }
343
344 # neither name nor email exist in authors.
345 if ($pname ne "unknown") {
346 if (my $email= $seen_map{"name"}{$pname}) {
347 ## we have seen this pname before, check the pemail
348 ## is consistent
349 if ($email ne $pemail) {
350 warn encode_utf8 "Inconsistent emails for name '$pname'"
351 . " at line num $line_num: keeping '$email',"
352 . " ignoring '$pemail'\n";
353 $pemail= $email;
354 }
355 }
356 else {
357 $seen_map{"name"}{$pname}= $pemail;
358 }
359 }
360 if ($pemail ne "unknown") {
361 if (my $name= $seen_map{"email"}{$pemail}) {
362 ## we have seen this preferred_email before, check the preferred_name
363 ## is consistent
364 if ($name ne $pname) {
365 warn encode_utf8 "Inconsistent name for email '$pemail'"
366 . " at line num $line_num: keeping '$name', ignoring"
367 . " '$pname'\n";
368 $pname= $name;
369 }
370 }
371 else {
372 $seen_map{"email"}{$pemail}= $pname;
373 }
374 }
375
376 # Build an index of "preferred name/email" to other-email, other name
377 # we use this later to remove redundant entries missing a name.
378 $pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }=
379 [ $pname, $pemail, $oname, $oemail, $line_num ];
380 }
381
382 # this removes entries like
383 # Joe <blogs> <whatever>
384 # where there is a corresponding
385 # Joe <blogs> Joe X <blogs>
386 foreach my $pref (_sorted_hash_keys(\%pref_groups)) {
387 my $entries= $pref_groups{$pref};
388 foreach my $email (_sorted_hash_keys($entries)) {
389 my @names= _sorted_hash_keys($entries->{$email});
390 if ($names[0] eq "" and @names > 1) {
391 shift @names;
392 }
393 foreach my $name (@names) {
394 push @fixed, $entries->{$email}{$name};
395 }
396 }
397 }
398
399 # final pass through the dataset, build up a database
400 # we will use later for checks and updates, and reconstruct
401 # the canonical entries.
402 my $new_mailmap_hash= {};
403 my $mailmap_info= {};
404 foreach my $rec (@fixed) {
405 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec;
406 my $preferred= "$pname <$pemail>";
407 my $other;
408 if (defined $oemail) {
409 $other= $oname ? "$oname <$oemail>" : "<$oemail>";
410 }
411 if ($other and $other ne "<unknown>") {
412 _safe_set_key($mailmap_info, $O2P, $other, $preferred);
413 _safe_set_key($mailmap_info, $O2PN, $other, $pname);
414 _safe_set_key($mailmap_info, $O2PE, $other, $pemail);
415 }
416 $mailmap_info->{$P2O}{$preferred}{$other}++;
417 if ($pname ne "unknown") {
418 _safe_set_key($mailmap_info, $N2P, $pname, $preferred);
419 }
420 if ($pemail ne "unknown") {
421 _safe_set_key($mailmap_info, $E2P, $pemail, $preferred);
422 }
423 my $line= $preferred;
424 $line .= " $other" if $other;
425 $new_mailmap_hash->{$line}= $line_num;
426 }
427 return ($new_mailmap_hash, $mailmap_info);
428}
429
430sub add_new_mailmap_entries {
431 my ($mailmap_hash, $mailmap_info, $mailmap_file)= @_;
432
433 my $mailmap_add= $mailmap_info->{add}
434 or return 0;
435
436 my $num= 0;
437 for my $new (sort keys %$mailmap_add) {
438 !$mailmap_hash->{$new}++ or next;
439 warn encode_utf8 "Updating '$mailmap_file' with: $new\n";
440 $num++;
441 }
442 return $num;
443}
444
445sub read_and_update {
446 my ($authors_file, $mailmap_file)= @_;
447
448 # read the authors file and extract the info it contains
449 my ($author_info, $authors_preamble)= read_authors($authors_file);
450
451 # read the mailmap file.
452 my ($orig_mailmap_hash, $mailmap_preamble)= read_mailmap($mailmap_file);
453
454 # check and possibly fix the mailmap data, and build a set of precomputed
455 # datasets to work with it.
456 my ($mailmap_hash, $mailmap_info)=
457 check_fix_mailmap_hash($orig_mailmap_hash, $author_info);
458
459 # update the mailmap based on any check or fixes we just did,
460 # we always write even if we did not do any changes.
461 update_mailmap($mailmap_hash, $mailmap_preamble, $mailmap_file);
462
463 # read the commits names using git log, and compares and checks
464 # them against the data we have in authors.
465 read_commit_log($author_info, $mailmap_info);
466
467 # update the authors file with any changes, we always write,
468 # but we may not change anything
469 update_authors($author_info, $authors_preamble, $authors_file);
470
471 # check if we discovered new email data from the commits that
472 # we need to write back to disk.
473 add_new_mailmap_entries($mailmap_hash, $mailmap_info, $mailmap_file)
474 and update_mailmap($mailmap_hash, $mailmap_preamble,
475 $mailmap_file, $mailmap_info);
476
477 return undef;
478}
479
480sub main {
481 local $Data::Dumper::Sortkeys= 1;
482 my $authors_file= "AUTHORS";
483 my $mailmap_file= ".mailmap";
484 my $show_man= 0;
485 my $show_help= 0;
486
487 ## Parse options and print usage if there is a syntax error,
488 ## or if usage was explicitly requested.
489 GetOptions(
490 'help|?' => \$show_help,
491 'man' => \$show_man,
492 'authors_file|authors-file=s' => \$authors_file,
493 'mailmap_file|mailmap-file=s' => \$mailmap_file,
494 ) or pod2usage(2);
495 pod2usage(1) if $show_help;
496 pod2usage(-verbose => 2) if $show_man;
497
498 read_and_update($authors_file, $mailmap_file);
499 return 0; # 0 for no error - intended for exit();
500}
501
502exit(main()) unless caller;
503
5041;
505__END__
506
507=head1 NAME
508
509Porting/updateAUTHORS.pl - Automatically update AUTHORS and .mailmap
510based on commit data.
511
512=head1 SYNOPSIS
513
514Porting/updateAUTHORS.pl
515
516 Options:
517 --help brief help message
518 --man full documentation
519 --authors-file=FILE override default location of AUTHORS
520 --mailmap-file=FILE override default location of .mailmap
521
522=head1 OPTIONS
523
524=over 4
525
526=item --help
527
528Print a brief help message and exits.
529
530=item --man
531
532Prints the manual page and exits.
533
534=item --authors-file=FILE
535
536=item --authors_file=FILE
537
538Override the default location of the authors file, which is "AUTHORS" in
539the current directory.
540
541=item --mailmap-file=FILE
542
543=item --mailmap_file=FILE
544
545Override the default location of the mailmap file, which is ".mailmap"
546in the current directory.
547
548=back
549
550=head1 DESCRIPTION
551
552This program will automatically manage updates to the AUTHORS file and
553.mailmap file based on the data in our commits and the data in the files
554themselves. It uses no other sources of data. Expects to be run from
555the root a git repo of perl.
556
557In simple, execute the script and it will either die with a helpful
558message or it will update the files as necessary, possibly not at all if
559there is no need to do so. Note it will actually rewrite the files at
560least once, but it may not actually make any changes to their content.
561Thus to use the script is currently required that the files are
562modifiable.
563
564Review the changes it makes to make sure they are sane. If they are
565commit. If they are not then update the AUTHORS or .mailmap files as is
566appropriate and run the tool again. Typically you shouldn't need to do
567either unless you are changing the default name or email for a user. For
568instance if a person currently listed in the AUTHORS file whishes to
569change their preferred name or email then change it in the AUTHORS file
570and run the script again. I am not sure when you might need to directly
571modify .mailmap, usually modifying the AUTHORS file should suffice.
572
573=head1 FUNCTIONS
574
575Note that the file can also be used as a package. If you require the
576file then you can access the functions located within the package
577C<Porting::updateAUTHORS>. These are as follows:
578
579=over 4
580
581=item add_new_mailmap_entries($mailmap_hash, $mailmap_info, $mailmap_file)
582
583If any additions were identified while reading the commits this will
584inject them into the mailmap_hash so they can be written out. Returns a
585count of additions found.
586
587=item check_fix_mailmap_hash($mailmap_hash, $authors_info)
588
589Analyzes the data contained the in the .mailmap file and applies any
590automated fixes which are required and which it can automatically
591perform. Returns a hash of adjusted entries and a hash with additional
592metadata about the mailmap entries.
593
594=item main()
595
596This implements the command line version of this module, handle command
597line options, etc.
598
599=item merge_mailmap_with_AUTHORS_and_checkAUTHORS_data
600
601This is a utility function that combines data from this tool with data
602contained in F<Porting/checkAUTHORS.pl> it is not used directly, but was
603used to cleanup and generate the current version of the .mailmap file.
604
605=item parse_mailmap_hash($mailmap_hash)
606
607Takes a mailmap_hash and parses it and returns it as an array of array
608records with the contents:
609
610 [ $preferred_name, $preferred_email,
611 $other_name, $other_email,
612 $line_num ]
613
614=item read_and_update($authors_file, $mailmap_file)
615
616Wraps the other functions in this library and implements the logic and
617intent of this tool. Takes two arguments, the authors file name, and the
618mailmap file name. Returns nothing but may modify the AUTHORS file
619or the .mailmap file. Requires that both files are editable.
620
621=item read_commit_log($authors_info, $mailmap_info)
622
623Read the commit log and find any new names it contains.
624
625=item read_authors($authors_file)
626
627Read the AUTHORS file and return data about it.
628
629=item read_mailmap($mailmap_file)
630
631Read the .mailmap file and return data about it.
632
633=item update_authors($authors_info, $authors_preamble, $authors_file)
634
635Write out an updated AUTHORS file. This is done atomically
636using a rename, we will not leave a half modified file in
637the repo.
638
639=item update_mailmap($mm_hash, $mm_preamble, $mailmap_file, $mm_info)
640
641Write out an updated .mailmap file. This is done atomically
642using a rename, we will not leave a half modified file in
643the repo.
644
645=back
646
647=head1 TODO
648
649More documentation and testing.
650
651=head1 SEE ALSO
652
653F<Porting/checkAUTHORS.pl>
654
655=head1 AUTHOR
656
657Yves Orton <demerphq@gmail.com>
658
659=cut