This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perldelta: Regex sets are no longer experimental
[perl5.git] / Porting / updateAUTHORS.pl
CommitLineData
a5931453
YO
1#!/usr/bin/env perl
2package Porting::updateAUTHORS;
3use strict;
4use warnings;
5use Getopt::Long qw(GetOptions);
6use Pod::Usage qw(pod2usage);
7use Data::Dumper;
8use Encode qw(encode_utf8 decode_utf8 decode);
9
10# The style of this file is determined by:
11#
12# perltidy -w -ple -bbb -bbc -bbs -nolq -l=80 -noll -nola -nwls='=' \
13# -isbc -nolc -otr -kis -ci=4 -se -sot -sct -nsbl -pt=2 -fs \
14# -fsb='#start-no-tidy' -fse='#end-no-tidy'
15
16# Info and config for passing to git log.
17# %an: author name
18# %aN: author name (respecting .mailmap, see git-shortlog(1) or git-blame(1))
19# %ae: author email
20# %aE: author email (respecting .mailmap, see git-shortlog(1) or git-blame(1))
21# %cn: committer name
22# %cN: committer name (respecting .mailmap, see git-shortlog(1) or git-blame(1))
23# %ce: committer email
24# %cE: committer email (respecting .mailmap, see git-shortlog(1) or git-blame(1))
25# %H: commit hash
26# %h: abbreviated commit hash
27# %s: subject
28# %x00: print a byte from a hex code
29
30my %field_spec= (
31 "an" => "author_name",
32 "aN" => "author_name_mm",
33 "ae" => "author_email",
34 "aE" => "author_email_mm",
35 "cn" => "committer_name",
36 "cN" => "committer_name_mm",
37 "ce" => "committer_email",
38 "cE" => "committer_email_mm",
39 "H" => "commit_hash",
40 "h" => "abbrev_hash",
41 "s" => "commit_subject",
42);
43
44my @field_codes= sort keys %field_spec;
45my @field_names= map { $field_spec{$_} } @field_codes;
46my $tformat= join "%x00", map { "%" . $_ } @field_codes;
47
48sub _make_name_author_info {
49 my ($author_info, $commit_info, $name_key)= @_;
50 (my $email_key= $name_key) =~ s/name/email/;
51 my $email= $commit_info->{$email_key};
52 my $name= $commit_info->{$name_key};
53
54 my $line= $author_info->{"email2line"}{$email}
55 // $author_info->{"name2line"}{$name};
56
57 $line //= sprintf "%-31s<%s>",
58 $commit_info->{$name_key}, $commit_info->{$email_key};
59 return $line;
60}
61
62sub _make_name_simple {
63 my ($commit_info, $key)= @_;
64 my $name_key= $key . "_name";
65 my $email_key= $key . "_email";
66 return sprintf "%s <%s>", $commit_info->{$name_key},
67 lc($commit_info->{$email_key});
68}
69
70sub read_commit_log {
71 my ($author_info, $mailmap_info)= @_;
72 $author_info ||= {};
73 open my $fh, qq(git log --pretty='tformat:$tformat' |);
74
75 while (defined(my $line= <$fh>)) {
76 chomp $line;
77 $line= decode_utf8($line);
78 my $commit_info= {};
79 @{$commit_info}{@field_names}= split /\0/, $line, 0 + @field_names;
80
81 my $author_name_mm= _make_name_author_info($author_info, $commit_info,
82 "author_name_mm");
83
84 my $committer_name_mm=
85 _make_name_author_info($author_info, $commit_info,
86 "committer_name_mm");
87
88 my $author_name_real= _make_name_simple($commit_info, "author");
89
90 my $committer_name_real= _make_name_simple($commit_info, "committer");
91
92 _check_name_mailmap(
93 $mailmap_info, $author_name_mm, $author_name_real,
94 $commit_info, "author name"
95 );
96 _check_name_mailmap($mailmap_info, $committer_name_mm,
97 $committer_name_real, $commit_info, "committer name");
98
99 $author_info->{"lines"}{$author_name_mm}++;
100 $author_info->{"lines"}{$committer_name_mm}++;
101 }
102 return $author_info;
103}
104
105sub read_authors {
106 my ($authors_file)= @_;
107 $authors_file ||= "AUTHORS";
108
109 my @authors_preamble;
110 open my $in_fh, "<", $authors_file
111 or die "Failed to open for read '$authors_file': $!";
112 while (defined(my $line= <$in_fh>)) {
113 chomp $line;
114 push @authors_preamble, $line;
115 if ($line =~ /^--/) {
116 last;
117 }
118 }
119 my %author_info;
120 while (defined(my $line= <$in_fh>)) {
121 chomp $line;
122 $line= decode_utf8($line);
123 my ($name, $email);
124 my $copy= $line;
125 $copy =~ s/\s+\z//;
126 if ($copy =~ s/<([^<>]*)>//) {
127 $email= $1;
128 }
129 elsif ($copy =~ s/\s+(\@\w+)\z//) {
130 $email= $1;
131 }
132 $copy =~ s/\s+\z//;
133 $name= $copy;
134 $email //= "unknown";
135 $email= lc($email);
136
137 $author_info{"lines"}{$line}++;
138 $author_info{"email2line"}{$email}= $line
139 if $email and $email ne "unknown";
140 $author_info{"name2line"}{$name}= $line
141 if $name and $name ne "unknown";
142 $author_info{"email2name"}{ lc($email) }= $name
143 if $email
144 and $name
145 and $email ne "unknown";
146 $author_info{"name2email"}{$name}= $email
147 if $name and $name ne "unknown";
148 }
149 close $in_fh
150 or die "Failed to close '$authors_file': $!";
151 return (\%author_info, \@authors_preamble);
152}
153
154sub update_authors {
155 my ($author_info, $authors_preamble, $authors_file)= @_;
156 $authors_file ||= "AUTHORS";
157 my $authors_file_new= $authors_file . ".new";
158 open my $out_fh, ">", $authors_file_new
159 or die "Failed to open for write '$authors_file_new': $!";
707d8393 160 binmode $out_fh;
a5931453
YO
161 foreach my $line (@$authors_preamble) {
162 print $out_fh encode_utf8($line), "\n"
163 or die "Failed to print to '$authors_file_new': $!";
164 }
165 foreach my $author (_sorted_hash_keys($author_info->{"lines"})) {
166 next if $author =~ /^unknown/;
167 if ($author =~ s/\s*<unknown>\z//) {
168 next if $author =~ /^\w+$/;
169 }
170 print $out_fh encode_utf8($author), "\n"
171 or die "Failed to print to '$authors_file_new': $!";
172 }
173 close $out_fh
174 or die "Failed to close '$authors_file_new': $!";
175 rename $authors_file_new, $authors_file
176 or die "Failed to rename '$authors_file_new' to '$authors_file':$!";
177 return 1; # ok
178}
179
180sub read_mailmap {
181 my ($mailmap_file)= @_;
182 $mailmap_file ||= ".mailmap";
183
184 open my $in, "<", $mailmap_file
185 or die "Failed to read '$mailmap_file': $!";
186 my %mailmap_hash;
187 my @mailmap_preamble;
188 my $line_num= 0;
189 while (defined(my $line= <$in>)) {
190 ++$line_num;
191 next unless $line =~ /\S/;
192 chomp($line);
193 $line= decode_utf8($line);
194 if ($line =~ /^#/) {
195 if (!keys %mailmap_hash) {
196 push @mailmap_preamble, $line;
197 }
198 else {
199 die encode_utf8 "Not expecting comments after header ",
200 "finished at line $line_num!\nLine: $line\n";
201 }
202 }
203 else {
204 $mailmap_hash{$line}= $line_num;
205 }
206 }
207 close $in;
208 return \%mailmap_hash, \@mailmap_preamble;
209}
210
211# this can be used to extract data from the checkAUTHORS data
212sub merge_mailmap_with_AUTHORS_and_checkAUTHORS_data {
213 my ($mailmap_hash, $author_info)= @_;
214 require 'Porting/checkAUTHORS.pl' or die "No authors?";
215 my ($map, $preferred_email_or_github)=
216 Porting::checkAUTHORS::generate_known_author_map();
217
218 foreach my $old (sort keys %$preferred_email_or_github) {
219 my $new= $preferred_email_or_github->{$old};
220 next if $old !~ /\@/ or $new !~ /\@/ or $new eq $old;
221 my $name= $author_info->{"email2name"}{$new};
222 if ($name) {
223 my $line= "$name <$new> <$old>";
224 $mailmap_hash->{$line}++;
225 }
226 }
227 return 1; # ok
228}
229
230sub _sorted_hash_keys {
231 my ($hash)= @_;
232 my @sorted= sort { lc($a) cmp lc($b) || $a cmp $b } keys %$hash;
233 return @sorted;
234}
235
236sub update_mailmap {
237 my ($mailmap_hash, $mailmap_preamble, $mailmap_file)= @_;
238 $mailmap_file ||= ".mailmap";
239
240 my $mailmap_file_new= $mailmap_file . "_new";
241 open my $out, ">", $mailmap_file_new
242 or die "Failed to write '$mailmap_file_new':$!";
707d8393 243 binmode $out;
a5931453
YO
244 foreach my $line (@$mailmap_preamble, _sorted_hash_keys($mailmap_hash),) {
245 print $out encode_utf8($line), "\n"
246 or die "Failed to print to '$mailmap_file': $!";
247 }
248 close $out;
249 rename $mailmap_file_new, $mailmap_file
250 or die "Failed to rename '$mailmap_file_new' to '$mailmap_file':$!";
251 return 1; # ok
252}
253
254sub parse_mailmap_hash {
255 my ($mailmap_hash)= @_;
256 my @recs;
257 foreach my $line (sort keys %$mailmap_hash) {
258 my $line_num= $mailmap_hash->{$line};
259 $line =~ /^ \s* (?: ( [^<>]*? ) \s+ )? <([^<>]*)>
260 (?: \s+ (?: ( [^<>]*? ) \s+ )? <([^<>]*)> )? \s* \z /x
261 or die encode_utf8 "Failed to parse line num $line_num: '$line'";
262 if (!$1 or !$2) {
263 die encode_utf8 "Both preferred name and email are mandatory ",
264 "in line num $line_num: '$line'";
265 }
266
267 # [ preferred_name, preferred_email, other_name, other_email ]
268 push @recs, [ $1, $2, $3, $4, $line_num ];
269 }
270 return \@recs;
271}
272
273sub _safe_set_key {
274 my ($hash, $root_key, $key, $val, $pretty_name)= @_;
275 $hash->{$root_key}{$key} //= $val;
276 my $prev= $hash->{$root_key}{$key};
277 if ($prev ne $val) {
278 die encode_utf8 "Collision on mapping $root_key: "
279 . " '$key' maps to '$prev' and '$val'\n";
280 }
281}
282
283my $O2P= "other2preferred";
284my $O2PN= "other2preferred_name";
285my $O2PE= "other2preferred_email";
286my $P2O= "preferred2other";
287my $N2P= "name2preferred";
288my $E2P= "email2preferred";
289
290my $blurb= ""; # FIXME - replace with a nice message
291
292sub _check_name_mailmap {
293 my ($mailmap_info, $auth_name, $raw_name, $commit_info, $descr)= @_;
294 my $name= $auth_name;
295 $name =~ s/<([^<>]+)>/<\L$1\E>/
296 or $name =~ s/(\s)(\@\w+)\z/$1<\L$2\E>/
297 or $name .= " <unknown>";
298
299 $name =~ s/\s+/ /g;
300
301 if (!$mailmap_info->{$P2O}{$name}) {
302 warn encode_utf8 sprintf "Unknown %s '%s' in commit %s '%s'\n%s",
303 $descr,
304 $name, $commit_info->{"abbrev_hash"},
305 $commit_info->{"commit_subject"},
306 $blurb;
307 $mailmap_info->{add}{"$name $raw_name"}++;
308 return 0;
309 }
310 elsif (!$mailmap_info->{$P2O}{$name}{$raw_name}) {
311 $mailmap_info->{add}{"$name $raw_name"}++;
312 }
313 return 1;
314}
315
316sub check_fix_mailmap_hash {
317 my ($mailmap_hash, $authors_info)= @_;
318 my $parsed= parse_mailmap_hash($mailmap_hash);
319 my @fixed;
320 my %seen_map;
321 my %pref_groups;
322
323 # first pass through the data, do any conversions, eg, LC
324 # the email address, decode any MIME-Header style email addresses.
325 # We also correct any preferred name entries so they match what
326 # we already have in AUTHORS, and check that there aren't collisions
327 # or other issues in the data.
328 foreach my $rec (@$parsed) {
329 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec;
330 $pemail= lc($pemail);
331 $oemail= lc($oemail) if defined $oemail;
332 if ($pname =~ /=\?UTF-8\?/) {
333 $pname= decode("MIME-Header", $pname);
334 }
335 my $auth_email= $authors_info->{"name2email"}{$pname};
336 if ($auth_email) {
337 ## this name exists in authors, so use its email data for pemail
338 $pemail= $auth_email;
339 }
340 my $auth_name= $authors_info->{"email2name"}{$pemail};
341 if ($auth_name) {
342 ## this email exists in authors, so use its name data for pname
343 $pname= $auth_name;
344 }
345
346 # neither name nor email exist in authors.
347 if ($pname ne "unknown") {
348 if (my $email= $seen_map{"name"}{$pname}) {
349 ## we have seen this pname before, check the pemail
350 ## is consistent
351 if ($email ne $pemail) {
352 warn encode_utf8 "Inconsistent emails for name '$pname'"
353 . " at line num $line_num: keeping '$email',"
354 . " ignoring '$pemail'\n";
355 $pemail= $email;
356 }
357 }
358 else {
359 $seen_map{"name"}{$pname}= $pemail;
360 }
361 }
362 if ($pemail ne "unknown") {
363 if (my $name= $seen_map{"email"}{$pemail}) {
364 ## we have seen this preferred_email before, check the preferred_name
365 ## is consistent
366 if ($name ne $pname) {
367 warn encode_utf8 "Inconsistent name for email '$pemail'"
368 . " at line num $line_num: keeping '$name', ignoring"
369 . " '$pname'\n";
370 $pname= $name;
371 }
372 }
373 else {
374 $seen_map{"email"}{$pemail}= $pname;
375 }
376 }
377
378 # Build an index of "preferred name/email" to other-email, other name
379 # we use this later to remove redundant entries missing a name.
380 $pref_groups{"$pname $pemail"}{$oemail}{ $oname || "" }=
381 [ $pname, $pemail, $oname, $oemail, $line_num ];
382 }
383
384 # this removes entries like
385 # Joe <blogs> <whatever>
386 # where there is a corresponding
387 # Joe <blogs> Joe X <blogs>
388 foreach my $pref (_sorted_hash_keys(\%pref_groups)) {
389 my $entries= $pref_groups{$pref};
390 foreach my $email (_sorted_hash_keys($entries)) {
391 my @names= _sorted_hash_keys($entries->{$email});
392 if ($names[0] eq "" and @names > 1) {
393 shift @names;
394 }
395 foreach my $name (@names) {
396 push @fixed, $entries->{$email}{$name};
397 }
398 }
399 }
400
401 # final pass through the dataset, build up a database
402 # we will use later for checks and updates, and reconstruct
403 # the canonical entries.
404 my $new_mailmap_hash= {};
405 my $mailmap_info= {};
406 foreach my $rec (@fixed) {
407 my ($pname, $pemail, $oname, $oemail, $line_num)= @$rec;
408 my $preferred= "$pname <$pemail>";
409 my $other;
410 if (defined $oemail) {
411 $other= $oname ? "$oname <$oemail>" : "<$oemail>";
412 }
413 if ($other and $other ne "<unknown>") {
414 _safe_set_key($mailmap_info, $O2P, $other, $preferred);
415 _safe_set_key($mailmap_info, $O2PN, $other, $pname);
416 _safe_set_key($mailmap_info, $O2PE, $other, $pemail);
417 }
418 $mailmap_info->{$P2O}{$preferred}{$other}++;
419 if ($pname ne "unknown") {
420 _safe_set_key($mailmap_info, $N2P, $pname, $preferred);
421 }
422 if ($pemail ne "unknown") {
423 _safe_set_key($mailmap_info, $E2P, $pemail, $preferred);
424 }
425 my $line= $preferred;
426 $line .= " $other" if $other;
427 $new_mailmap_hash->{$line}= $line_num;
428 }
429 return ($new_mailmap_hash, $mailmap_info);
430}
431
432sub add_new_mailmap_entries {
433 my ($mailmap_hash, $mailmap_info, $mailmap_file)= @_;
434
435 my $mailmap_add= $mailmap_info->{add}
436 or return 0;
437
438 my $num= 0;
439 for my $new (sort keys %$mailmap_add) {
440 !$mailmap_hash->{$new}++ or next;
441 warn encode_utf8 "Updating '$mailmap_file' with: $new\n";
442 $num++;
443 }
444 return $num;
445}
446
447sub read_and_update {
448 my ($authors_file, $mailmap_file)= @_;
449
450 # read the authors file and extract the info it contains
451 my ($author_info, $authors_preamble)= read_authors($authors_file);
452
453 # read the mailmap file.
454 my ($orig_mailmap_hash, $mailmap_preamble)= read_mailmap($mailmap_file);
455
456 # check and possibly fix the mailmap data, and build a set of precomputed
457 # datasets to work with it.
458 my ($mailmap_hash, $mailmap_info)=
459 check_fix_mailmap_hash($orig_mailmap_hash, $author_info);
460
461 # update the mailmap based on any check or fixes we just did,
462 # we always write even if we did not do any changes.
463 update_mailmap($mailmap_hash, $mailmap_preamble, $mailmap_file);
464
465 # read the commits names using git log, and compares and checks
466 # them against the data we have in authors.
467 read_commit_log($author_info, $mailmap_info);
468
469 # update the authors file with any changes, we always write,
470 # but we may not change anything
471 update_authors($author_info, $authors_preamble, $authors_file);
472
473 # check if we discovered new email data from the commits that
474 # we need to write back to disk.
475 add_new_mailmap_entries($mailmap_hash, $mailmap_info, $mailmap_file)
476 and update_mailmap($mailmap_hash, $mailmap_preamble,
477 $mailmap_file, $mailmap_info);
478
479 return undef;
480}
481
482sub main {
483 local $Data::Dumper::Sortkeys= 1;
484 my $authors_file= "AUTHORS";
485 my $mailmap_file= ".mailmap";
486 my $show_man= 0;
487 my $show_help= 0;
488
489 ## Parse options and print usage if there is a syntax error,
490 ## or if usage was explicitly requested.
491 GetOptions(
492 'help|?' => \$show_help,
493 'man' => \$show_man,
494 'authors_file|authors-file=s' => \$authors_file,
495 'mailmap_file|mailmap-file=s' => \$mailmap_file,
496 ) or pod2usage(2);
497 pod2usage(1) if $show_help;
498 pod2usage(-verbose => 2) if $show_man;
499
500 read_and_update($authors_file, $mailmap_file);
501 return 0; # 0 for no error - intended for exit();
502}
503
504exit(main()) unless caller;
505
5061;
507__END__
508
509=head1 NAME
510
511Porting/updateAUTHORS.pl - Automatically update AUTHORS and .mailmap
512based on commit data.
513
514=head1 SYNOPSIS
515
516Porting/updateAUTHORS.pl
517
518 Options:
519 --help brief help message
520 --man full documentation
521 --authors-file=FILE override default location of AUTHORS
522 --mailmap-file=FILE override default location of .mailmap
523
524=head1 OPTIONS
525
526=over 4
527
528=item --help
529
530Print a brief help message and exits.
531
532=item --man
533
534Prints the manual page and exits.
535
536=item --authors-file=FILE
537
538=item --authors_file=FILE
539
540Override the default location of the authors file, which is "AUTHORS" in
541the current directory.
542
543=item --mailmap-file=FILE
544
545=item --mailmap_file=FILE
546
547Override the default location of the mailmap file, which is ".mailmap"
548in the current directory.
549
550=back
551
552=head1 DESCRIPTION
553
554This program will automatically manage updates to the AUTHORS file and
555.mailmap file based on the data in our commits and the data in the files
556themselves. It uses no other sources of data. Expects to be run from
557the root a git repo of perl.
558
559In simple, execute the script and it will either die with a helpful
560message or it will update the files as necessary, possibly not at all if
561there is no need to do so. Note it will actually rewrite the files at
562least once, but it may not actually make any changes to their content.
563Thus to use the script is currently required that the files are
564modifiable.
565
566Review the changes it makes to make sure they are sane. If they are
567commit. If they are not then update the AUTHORS or .mailmap files as is
568appropriate and run the tool again. Typically you shouldn't need to do
569either unless you are changing the default name or email for a user. For
570instance if a person currently listed in the AUTHORS file whishes to
571change their preferred name or email then change it in the AUTHORS file
572and run the script again. I am not sure when you might need to directly
573modify .mailmap, usually modifying the AUTHORS file should suffice.
574
575=head1 FUNCTIONS
576
577Note that the file can also be used as a package. If you require the
578file then you can access the functions located within the package
579C<Porting::updateAUTHORS>. These are as follows:
580
581=over 4
582
583=item add_new_mailmap_entries($mailmap_hash, $mailmap_info, $mailmap_file)
584
585If any additions were identified while reading the commits this will
586inject them into the mailmap_hash so they can be written out. Returns a
587count of additions found.
588
589=item check_fix_mailmap_hash($mailmap_hash, $authors_info)
590
591Analyzes the data contained the in the .mailmap file and applies any
592automated fixes which are required and which it can automatically
593perform. Returns a hash of adjusted entries and a hash with additional
594metadata about the mailmap entries.
595
596=item main()
597
598This implements the command line version of this module, handle command
599line options, etc.
600
601=item merge_mailmap_with_AUTHORS_and_checkAUTHORS_data
602
603This is a utility function that combines data from this tool with data
604contained in F<Porting/checkAUTHORS.pl> it is not used directly, but was
605used to cleanup and generate the current version of the .mailmap file.
606
607=item parse_mailmap_hash($mailmap_hash)
608
609Takes a mailmap_hash and parses it and returns it as an array of array
610records with the contents:
611
612 [ $preferred_name, $preferred_email,
613 $other_name, $other_email,
614 $line_num ]
615
616=item read_and_update($authors_file, $mailmap_file)
617
618Wraps the other functions in this library and implements the logic and
619intent of this tool. Takes two arguments, the authors file name, and the
620mailmap file name. Returns nothing but may modify the AUTHORS file
621or the .mailmap file. Requires that both files are editable.
622
623=item read_commit_log($authors_info, $mailmap_info)
624
625Read the commit log and find any new names it contains.
626
627=item read_authors($authors_file)
628
629Read the AUTHORS file and return data about it.
630
631=item read_mailmap($mailmap_file)
632
633Read the .mailmap file and return data about it.
634
635=item update_authors($authors_info, $authors_preamble, $authors_file)
636
637Write out an updated AUTHORS file. This is done atomically
638using a rename, we will not leave a half modified file in
639the repo.
640
641=item update_mailmap($mm_hash, $mm_preamble, $mailmap_file, $mm_info)
642
643Write out an updated .mailmap file. This is done atomically
644using a rename, we will not leave a half modified file in
645the repo.
646
647=back
648
649=head1 TODO
650
651More documentation and testing.
652
653=head1 SEE ALSO
654
655F<Porting/checkAUTHORS.pl>
656
657=head1 AUTHOR
658
659Yves Orton <demerphq@gmail.com>
660
661=cut