This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add comment explaining where terrible code comes from
[perl5.git] / Porting / checkAUTHORS.pl
old mode 100644 (file)
new mode 100755 (executable)
index 5c2a73c..44f82b5
@@ -5,56 +5,88 @@ use Getopt::Long;
 use Text::Wrap;
 $Text::Wrap::columns = 80;
 
-my ($rank, $percentage, $cumulative, $reverse, $ta, @authors, %authors,
-    %untraced, %patchers, %committers, %real_names, $as_test_output);
-my $result = GetOptions ("rank" => \$rank,            # rank authors
-             "thanks-applied" => \$ta,        # ranks committers
-             "acknowledged=s"   => \@authors ,  # authors files
+my ($rank, $ta, $ack, $who, $tap) = (0) x 5;
+my ($author_file, $percentage, $cumulative, $reverse);
+my (%authors, %untraced, %patchers, %committers, %real_names);
+
+my $result = GetOptions (
+             # modes
+             "who" => \$who,
+             "rank" => \$rank,
+             "thanks-applied" => \$ta,
+             "missing"   => \$ack ,
+             "tap" => \$tap,
+             # modifiers
+             "authors" => \$author_file,
              "percentage" => \$percentage,      # show as %age
              "cumulative" => \$cumulative,
              "reverse" => \$reverse,
-             "tap" => \$as_test_output,
             );
 
-if (!$result or (($rank||0) + ($ta||0) + (@authors ? 1 : 0) != 1) or !@ARGV) {
+if (!$result or ( $rank + $ta + $who + $ack + $tap != 1 ) or !@ARGV) {
     usage();
 }
 
+$author_file ||= './AUTHORS';
+die "Can't locate '$author_file'. Specify it with '--author <path>'."
+  unless -f $author_file;
+
 my $map = generate_known_author_map();
 
-read_authors_files(@authors);
+read_authors_files($author_file);
 
-parse_commits_from_stdin();
 
 if ($rank) {
+  parse_commits_from_stdin();
   display_ordered(\%patchers);
 } elsif ($ta) {
+  parse_commits_from_stdin();
   display_ordered(\%committers);
-} elsif ($as_test_output) {
+} elsif ($tap) {
+  parse_commits_from_stdin_authors();
   display_test_output(\%patchers, \%authors, \%real_names);
-} elsif (%authors) {
+} elsif ($ack) {
+  parse_commits_from_stdin();
   display_missing_authors(\%patchers, \%authors, \%real_names);
+} elsif ($who) {
+  parse_commits_from_stdin();
+  list_authors(\%patchers, \%authors);
 }
 
-
-
 exit(0);
 
 sub usage {
 
   die <<"EOS";
-$0 --rank changes                           # rank authors by patches
-$0 --acknowledged <authors file> changes    # Display unacknowledged authors
-$0 --thanks-applied changes                 # ranks committers of others' patches
-$0 --percentage ...                         # show rankings as percentages
-$0 --cumulative ...                         # show rankings cumulatively
-$0 --reverse ...                            # show rankings in reverse
-Specify stdin as - if needs be. Remember that option names can be abbreviated.
-Generate changes with git log --pretty=fuller rev1..rev2
+Usage: $0 [modes] [modifiers] <git-log-output-file>
+
+Modes (use only one):
+   --who                          # show list of unique authors by full name
+   --rank                         # rank authors by patches
+   --thanks-applied               # ranks committers of others' patches
+   --missing                      # display authors not in AUTHORS
+   --tap                          # show authors present/missing as TAP
+
+Modifiers:
+   --authors <authors-file>       # path to authors file (default: ./AUTHORS)
+   --percentage                   # show rankings as percentages
+   --cumulative                   # show rankings cumulatively
+   --reverse                      # show rankings in reverse
+
+Generate git-log-output-file with git log --pretty=fuller rev1..rev2
+(or pipe by specifying '-' for stdin).  For example:
+  \$ git log --pretty=fuller v5.12.0..v5.12.1 > gitlog
+  \$ perl Porting/checkAUTHORS.pl --rank --percentage gitlog
 EOS
 }
 
-
+sub list_authors {
+    my ($patchers, $authors) = @_;
+    binmode(STDOUT, ":utf8");
+    print wrap '', '', join(', ', sort { lc $a cmp lc $b }
+                      map { $authors->{$_} }
+                      keys %$patchers) . ".\n";
+}
 
 sub parse_commits_from_stdin {
     my @lines = split( /^commit\s*/sm, join( '', <> ) );
@@ -77,6 +109,17 @@ sub parse_commits_from_stdin {
 
 }
 
+# just grab authors. Quicker than parse_commits_from_stdin
+
+sub parse_commits_from_stdin_authors {
+    while (<>) {
+        next unless /^Author:\s*(.*)$/;
+       my $author = $1;
+       $author = _raw_address($author);
+       $patchers{$author}++;
+    }
+}
+
 
 sub generate_known_author_map {
     my %map;
@@ -166,16 +209,19 @@ sub generate_known_author_map {
 sub read_authors_files {
     my @authors = (@_);
     return unless (@authors);
-    my %raw;
+    my (%count, %raw);
     foreach my $filename (@authors) {
         open FH, "<$filename" or die "Can't open $filename: $!";
+        binmode FH, ':encoding(ISO-8859-1)';
         while (<FH>) {
             next if /^\#/;
             next if /^-- /;
-            if (/<([^>]+)>/) {
-
+            if (/^([^<]+)<([^>]+)>/) {
                 # Easy line.
-                $raw{$1}++;
+                my ($name, $email) = ($1, $2);
+                $name =~ s/\s*\z//;
+                $raw{$email} = $name;
+                $count{$email}++;
             } elsif (/^([-A-Za-z0-9 .\'À-ÖØöø-ÿ]+)[\t\n]/) {
 
                 # Name only
@@ -189,12 +235,11 @@ sub read_authors_files {
         }
     }
     foreach ( keys %raw ) {
-        print "E-mail $_ occurs $raw{$_} times\n" if $raw{$_} > 1;
-        $_ = lc $_;
-        $authors{ $map->{$_} || $_ }++;
+        print "E-mail $_ occurs $count{$_} times\n" if $count{$_} > 1;
+        my $lc = lc $_;
+        $authors{ $map->{$lc} || $lc } = $raw{$_};
     }
-    ++$authors{'!'};
-    ++$authors{'?'};
+    $authors{$_} = $_ for qw(? !);
 }
 
 sub display_test_output {
@@ -202,9 +247,9 @@ sub display_test_output {
     my $authors    = shift;
     my $real_names = shift;
     my $count = 0;
+    printf "1..%d\n", scalar keys %$patchers;
     foreach ( sort keys %$patchers ) {
-           $count++;
-
+        $count++;
         if ($authors->{$_}) {
             print "ok $count - ".$real_names->{$_} ." $_\n";
         } else {
@@ -212,7 +257,6 @@ sub display_test_output {
         }
 
     }
-    print "1..$count\n";
 }
 
 sub display_missing_authors {
@@ -305,7 +349,7 @@ __DATA__
 # List of mappings. First entry the "correct" email address, as appears
 # in the AUTHORS file. Second is any "alias" mapped to it.
 #
-# If the "correct" email address is a '+', the entry above is reused; 
+# If the "correct" email address is a '+', the entry above is reused;
 # this for addresses with more than one alias.
 #
 # Note that all entries are in lowercase. Further, no '@' signs should
@@ -413,7 +457,8 @@ stevep                                  steve\100fisharerojo.org
 +                                       root\100dixie.cscaper.com
 timb                                    Tim.Bunce\100pobox.com
 +                                       tim.bunce\100ig.co.uk
-
+tonyc                                   tony\100develop-help.com
++                                       tony\100openbsd32.tony.develop-help.com
 
 #
 # Mere mortals.
@@ -464,8 +509,10 @@ andreas.koenig\100anima.de              andreas.koenig.gmwojprw\100franz.ak.mind
 +                                       root\100dubravka.in-berlin.de
 anno4000\100lublin.zrz.tu-berlin.de     anno4000\100mailbox.tu-berlin.de
 +                                       siegel\100zrz.tu-berlin.de
+apocal@cpan.org                         perl\1000ne.us
 arnold\100gnu.ai.mit.edu                arnold\100emoryu2.arpa
 +                                       gatech!skeeve!arnold
+arodland\100cpan.org                    andrew\100hbslabs.com
 arussell\100cs.uml.edu                  adam\100adam-pc.(none)
 ash\100cpan.org                         ash_cpan\100firemirror.com
 avarab\100gmail.com                     avar\100cpan.org
@@ -484,10 +531,12 @@ ben\100morrow.me.uk                     mauzo\100csv.warwick.ac.uk
 bepi\100perl.it                         enrico.sorcinelli\100gmail.com
 bert\100alum.mit.edu                    bert\100genscan.com
 bigbang7\100gmail.com                   ddascalescu+github\100gmail.com
+blgl\100stacken.kth.se                  blgl\100hagernas.com
 brian.d.foy\100gmail.com                bdfoy\100cpan.org
 BQW10602\100nifty.com                   sadahiro\100cpan.org
 
 chromatic\100wgz.org                    chromatic\100rmci.net
+claes\100surfar.nu                      claes\100versed.se
 clintp\100geeksalad.org                 cpierce1\100ford.com
 clkao\100clkao.org                      clkao\100bestpractical.com
 corion\100corion.net                    corion\100cpan.org
@@ -512,6 +561,7 @@ david.dyck\100fluke.com                 dcd\100tc.fluke.com
 david\100kineticode.com                 david\100wheeler.com
 +                                       david\100wheeler.net
 dennis\100booking.com                   dennis\100camel.ams6.corp.booking.com
++                                       dennis\100kaarsemaker.net
 dev-perl\100pimb.org                    knew-p5p\100pimb.org
 +                                       lists-p5p\100pimb.org
 djberg86\100attbi.com                   djberg96\100attbi.com
@@ -550,6 +600,7 @@ hansmu\100xs4all.nl                     hansm\100icgroup.nl
 hio\100ymir.co.jp                       hio\100hio.jp
 hops\100sco.com                         hops\100scoot.pdev.sco.com
 
+ian.goodacre\100xtra.co.nz              ian\100debian.lan
 ingo_weinhold\100gmx.de                 bonefish\100cs.tu-berlin.de
 
 james\100mastros.biz                    theorb\100desert-island.me.uk
@@ -564,7 +615,6 @@ jasons\100cs.unm.edu                    jasons\100sandy-home.arc.unm.edu
 jbuehler\100hekimian.com                jhpb\100hekimian.com
 jcromie\100100divsol.com                jcromie\100cpan.org
 +                                       jim.cromie\100gmail.com
-jidanni\100jidanni.org                  jidanni\100hoffa.dreamhost.com
 jdhedden\100cpan.org                    jerry\100hedden.us
 +                                       jdhedden\1001979.usna.com
 +                                       jdhedden\100gmail.com
@@ -574,7 +624,9 @@ jeremy\100zawodny.com                   jzawodn\100wcnet.org
 jesse\100sig.bsh.com                    jesse\100ginger
 jfriedl\100yahoo.com                    jfriedl\100yahoo-inc.com
 jfs\100fluent.com                       jfs\100jfs.fluent.com
-jhannah\100omnihotels.com               jay\100jays.net
+jhannah\100mutationgrid.com             jay\100jays.net
++                                       jhannah\100omnihotels.com
+jidanni\100jidanni.org                  jidanni\100hoffa.dreamhost.com
 jjore\100cpan.org                       twists\100gmail.com
 jns\100integration-house.com            jns\100gellyfish.com
 +                                       gellyfish\100gellyfish.com
@@ -587,6 +639,7 @@ jpeacock\100rowman.com                  john.peacock\100havurah-software.org
 +                                       jpeacock\100havurah-software.org
 +                                       jpeacock\100dsl092-147-156.wdc1.dsl.speakeasy.net
 +                                       jpeacock\100jpeacock-hp.doesntexist.org
++                                       jpeacock\100cpan.org
 jql\100accessone.com                    jql\100jql.accessone.com
 jsm28\100hermes.cam.ac.uk               jsm28\100cam.ac.uk
 
@@ -598,6 +651,8 @@ kane\100dwim.org                        kane\100xs4all.net
 ken\100mathforum.org                    kenahoo\100gmail.com
 +                                       ken.williams\100thomsonreuters.com
 kroepke\100dolphin-services.de          kay\100dolphin-services.de
+kst\100mib.org                          kst\100cts.com
++                                       kst\100SDSC.EDU
 kstar\100wolfetech.com                  kstar\100cpan.org
 +                                       kurt_starsinic\100ml.com
 +                                       kstar\100www.chapin.edu
@@ -609,7 +664,7 @@ larry\100wall.org                       lwall\100jpl-devvax.jpl.nasa.gov
 +                                       lwall\100scalpel.netlabs.com
 laszlo.molnar\100eth.ericsson.se        molnarl\100cdata.tvnet.hu
 +                                       ml1050\100freemail.hu
-lewart\100uiuc.edu                      lewart\100vadds.cvm.uiuc.edu    
+lewart\100uiuc.edu                      lewart\100vadds.cvm.uiuc.edu
 +                                       d-lewart\100uiuc.edu
 lkundrak\100v3.sk                      lubo.rintel\100gooddata.com
 lstein\100cshl.org                      lstein\100formaggio.cshl.org
@@ -620,6 +675,7 @@ mab\100wdl.loral.com                    markb\100rdcf.sm.unisys.com
 marcel\100codewerk.com                  gr\100univie.ac.at
 mark.p.lutz\100boeing.com               tecmpl1\100triton.ca.boeing.com
 marnix\100gmail.com                     pttesac!marnix!vanam
+marty+p5p\100kasei.com                  marty\100martian.org
 mats\100sm6sxl.net                      mats\100sm5sxl.net
 mbarbon\100dsi.unive.it                 mattia.barbon\100libero.it
 mcmahon\100ibiblio.org                  mcmahon\100metalab.unc.edu
@@ -627,10 +683,12 @@ me\100davidglasser.net                  glasser\100tang-eleven-seventy-nine.mit.
 merijnb\100iloquent.nl                  merijnb\100ms.com
 +                                       merijnb\100iloquent.com
 merlyn\100stonehenge.com                merlyn\100gadget.cscaper.com
+mestre.smash\100gmail.com               smash\100cpan.org
 mgjv\100comdyn.com.au                   mgjv\100tradingpost.com.au
 mlh\100swl.msd.ray.com                  webtools\100uewrhp03.msd.ray.com
 michael.schroeder\100informatik.uni-erlangen.de mls\100suse.de
 mike\100stok.co.uk                      mike\100exegenix.com
+miyagawa\100bulknews.net                    miyagawa\100edge.co.jp
 mjtg\100cam.ac.uk                       mjtg\100cus.cam.ac.uk
 mikedlr\100tardis.ed.ac.uk              mikedlr\100it.com.pl
 moritz\100casella.verplant.org          moritz\100faui2k3.org
@@ -656,6 +714,8 @@ ilya\100math.berkeley.edu               ilya\100math.ohio-state.edu
 +                                       [9]ilya\100math.ohio-state.edu
 ilya\100martynov.org                    ilya\100juil.nonet
 
+joshua.pritikin\100db.com               joshua\100paloalto.com
+
 okamoto\100corp.hp.com                  okamoto\100hpcc123.corp.hp.com
 orwant\100oreilly.com                   orwant\100media.mit.edu
 
@@ -680,11 +740,12 @@ perl\100greerga.m-l.org                 greerga\100m-l.org
 perl\100profvince.com                   vince\100profvince.com
 perl-rt\100wizbit.be                    p5p\100perl.wizbit.be
 # Maybe we should special case this to get real names out?
-Peter.Dintelmann\100Dresdner-Bank.com   peter.dintelmann\100dresdner-bank.com 
+Peter.Dintelmann\100Dresdner-Bank.com   peter.dintelmann\100dresdner-bank.com
 # NOTE: There is an intentional trailing space in the line above
 pfeifer\100wait.de                      pfeifer\100charly.informatik.uni-dortmund.de
 +                                       upf\100de.uu.net
 rabbit\100rabbit.us                     rabbit+bugs\100rabbit.us
+perl\100aaroncrane.co.uk               arc\100cpan.org
 phil\100perkpartners.com                phil\100finchcomputer.com
 pimlott\100idiomtech.com                andrew\100pimlott.net
 +                                       pimlott\100abel.math.harvard.edu
@@ -698,7 +759,9 @@ public\100khwilliamson.com              khw\100karl.(none)
 
 radu\100netsoft.ro                      rgreab\100fx.ro
 raphael.manfredi\100pobox.com           raphael_manfredi\100grenoble.hp.com
-renee.baecker\100smart-websolutions.de  reneeb\100reneeb-desktop.(none)
+module@renee-baecker.de                 renee.baecker\100smart-websolutions.de
++                                       reneeb\100reneeb-desktop.(none)
++                                       otrs\100ubuntu.(none)
 richard.foley\100rfi.net                richard.foley\100t-online.de
 +                                       richard.foley\100ubs.com
 +                                       richard.foley\100ubsw.com
@@ -717,6 +780,8 @@ rmbarker\100cpan.org                    rmb1\100cise.npl.co.uk
 +                                       robin.barker\100npl.co.uk
 +                                       rmb\100cise.npl.co.uk
 +                                       robin\100spade-ubuntu.(none)
++                                       r.m.barker\100btinternet.com
++                                       rmbarker.cpan\100btinternet.com
 robertmay\100cpan.org                   rob\100themayfamily.me.uk
 roberto\100keltia.freenix.fr            roberto\100eurocontrol.fr
 robin\100cpan.org                       robin\100kitsite.com
@@ -726,9 +791,12 @@ rootbeer\100teleport.com                rootbeer\100redcat.com
 +                                       tomphoenix\100unknown
 rurban\100x-ray.at                      rurban\100cpan.org
 
+sartak\100bestpractical.com             sartak\100gmail.com
+sadinoff\100olf.com                     danny-cpan\100sadinoff.com
 schubiger\100cpan.org                   steven\100accognoscere.org
 +                                       sts\100accognoscere.org
 +                                       schubiger\100gmail.com
++                                       stsc\100refcnt.org
 schwern\100pobox.com                    schwern\100gmail.com
 +                                       schwern\100athens.arena-i.com
 +                                       schwern\100blackrider.aocn.com
@@ -739,7 +807,9 @@ schwab\100suse.de                       schwab\100issan.informatik.uni-dortmund.
 +                                       schwab\100ls5.informatik.uni-dortmund.de
 sebastien\100aperghis.net               maddingue\100free.fr
 +                                       saper\100cpan.org
+shigeya\100wide.ad.jp                   shigeya\100foretune.co.jp
 shlomif\100vipe.technion.ac.il          shlomif\100iglu.org.il
++                                       shlomif+processed-by-perl\100gmail.com
 simon\100simon-cozens.org               simon\100pembro4.pmb.ox.ac.uk
 +                                       simon\100brecon.co.uk
 +                                       simon\100othersideofthe.earth.li
@@ -770,6 +840,7 @@ stef\100mongueurs.net                   stef\100payrard.net
 +                                       properler\100freesurf.fr
 +                                       stef\100francenet.fr
 sthoenna\100efn.org                     ysth\100raven.shiftboard.com
+sisyphus1\100optusnet.com.au            sisyphus\100cpan.org
 
 tassilo.parseval\100post.rwth-aachen.de tassilo.von.parseval\100rwth-aachen.de
 tchrist\100perl.com                     tchrist\100mox.perl.com
@@ -778,6 +849,7 @@ thomas.dorner\100start.de               tdorner\100amadeus.net
 tjenness\100cpan.org                    t.jenness\100jach.hawaii.edu
 +                                       timj\100jach.hawaii.edu
 tobez\100tobez.org                      tobez\100plab.ku.dk
+toddr\100cpanel.net                     toddr\100cpan.org
 tom\100compton.nu                       thh\100cyberscience.com
 tom.horsley\100mail.ccur.com            tom.horsley\100ccur.com
 +                                       tom\100amber.ssd.hcsc.com
@@ -787,9 +859,12 @@ vkonovalov\100lucent.com                vkonovalov\100peterstar.ru
 +                                       vadim\100vkonovalov.ru
 +                                       vkonovalov\100spb.lucent.com
 +                                       vkonovalov\100alcatel-lucent.com
++                                       vadim.konovalov\100alcatel-lucent.com
 
 whatever\100davidnicol.com              davidnicol\100gmail.com
 wolfgang.laun\100alcatel.at             wolfgang.laun\100chello.at
 +                                       wolfgang.laun\100thalesgroup.com
 +                                       wolfgang.laun\100gmail.com
 yath\100yath.de                         yath-perlbug\100yath.de
+
+jkeen@verizon.net                       jkeenan@cpan.org