#!/usr/bin/env perl # core-cpan-diff: Compare CPAN modules with their equivalent in core # Originally based on App::DualLivedDiff by Steffen Mueller. use strict; use warnings; use 5.010; use Getopt::Long; use File::Temp (); use File::Path (); use File::Spec; use Archive::Extract; use IO::Uncompress::Gunzip (); use File::Compare (); use ExtUtils::Manifest; BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' } use lib 'Porting'; use Maintainers (); # if running from blead, we may be doing -Ilib, which means when we # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc. # So preload the things we need, and tell it to check %INC first: use Archive::Tar; use IPC::Open3; use IO::Select; $Module::Load::Conditional::CHECK_INC_HASH = 1; # stop Archive::Extract whinging about lack of Archive::Zip $Archive::Extract::WARN = 0; # Files, which if they exist in CPAN but not in perl, will not generate # an 'Only in CPAN' listing # our %IGNORABLE = map { ($_ => 1) } qw(.cvsignore .dualLivedDiffConfig .gitignore ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README SIGNATURE THANKS TODO Todo VERSION WHATSNEW); # where, under the cache dir, to untar stuff to use constant UNTAR_DIR => 'untarred'; use constant DIFF_CMD => 'diff'; use constant WGET_CMD => 'wget'; sub usage { print STDERR "\n@_\n\n" if @_; print STDERR < \$scan_all, 'c|cachedir=s' => \$cache_dir, 'd|diff' => \$use_diff, 'diffopts:s' => \$diff_opts, 'f|force' => \$force, 'h|help' => \&usage, 'o|output=s' => \$output_file, 'r|reverse' => \$reverse, 'u|upstream=s@'=> \@wanted_upstreams, 'v|verbose' => \$verbose, 'x|crosscheck' => \$do_crosscheck, ) or usage; my @modules; usage("Cannot mix -a with module list") if $scan_all && @ARGV; if ($do_crosscheck) { usage("can't use -r, -d, --diffopts, -v with --crosscheck") if ($reverse || $use_diff || $diff_opts || $verbose); } else { $diff_opts = '-u' unless defined $diff_opts; usage("can't use -f without --crosscheck") if $force; } @modules = $scan_all ? grep $Maintainers::Modules{$_}{CPAN}, (sort {lc $a cmp lc $b } keys %Maintainers::Modules) : @ARGV; usage("No modules specified") unless @modules; my $outfh; if (defined $output_file) { open $outfh, '>', $output_file or die "ERROR: could not open file '$output_file' for writing: $!\n"; } else { open $outfh, ">&STDOUT" or die "ERROR: can't dup STDOUT: $!\n"; } if (defined $cache_dir) { die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir; } if ($do_crosscheck) { do_crosscheck($outfh, $cache_dir, $force, \@modules); } else { do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff, $reverse, $diff_opts, \@wanted_upstreams); } } # compare a list of modules against their CPAN equivalents sub do_compare { my ($modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff, $reverse, $diff_opts, $wanted_upstreams) = @_; # first, make sure we have a directory where they can all be untarred, # and if its a permanent directory, clear any previous content my $untar_dir; if ($cache_dir) { $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR); if (-d $untar_dir) { File::Path::rmtree($untar_dir) or die "failed to remove $untar_dir\n"; } mkdir $untar_dir or die "mkdir $untar_dir: $!\n"; } else { $untar_dir = File::Temp::tempdir( CLEANUP => 1 ); } my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE; my %seen_dist; for my $module (@$modules) { warn "Processing $module ...\n" if defined $output_file; my $m = $Maintainers::Modules{$module} or die "ERROR: No such module in Maintainers.pl: '$module'\n"; unless ($m->{CPAN}) { print $outfh "WARNING: $module is not dual-life; skipping\n"; next; } my $dist = $m->{DISTRIBUTION}; die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist; if ($seen_dist{$dist}) { warn "WARNING: duplicate entry for $dist in $module\n" } my $upstream = $m->{UPSTREAM} || 'UNKNOWN'; next if @$wanted_upstreams and ! ($upstream ~~ $wanted_upstreams); print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n" unless $use_diff; print $outfh " upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n"; $seen_dist{$dist}++; my $cpan_dir; eval { $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist) }; if ($@) { print $outfh " ", $@; print $outfh " (skipping)\n"; next; } my @perl_files = Maintainers::get_module_files($module); my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST'); die "ERROR: no such file: $manifest\n" unless -f $manifest; my $cpan_files = ExtUtils::Manifest::maniread($manifest); my @cpan_files = sort keys %$cpan_files; my ($excluded, $map) = get_map($m, $module, \@perl_files); my %perl_unseen; @perl_unseen{@perl_files} = (); my %perl_files = %perl_unseen; foreach my $cpan_file (@cpan_files) { my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file); unless (defined $mapped_file) { print $outfh " Excluded: $cpan_file\n" if $verbose; next; } if (exists $perl_files{$mapped_file}) { delete $perl_unseen{$mapped_file}; } else { # some CPAN files foo are stored in core as foo.packed, # which are then unpacked by 'make test_prep' my $packed_file = "$mapped_file.packed"; if (exists $perl_files{$packed_file} ) { if (! -f $mapped_file and -f $packed_file) { print $outfh <catfile($cpan_dir, $cpan_file); # should never happen die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file; # might happen if the FILES entry in Maintainers.pl is wrong unless (-f $mapped_file) { print $outfh "WARNING: perl file not found: $mapped_file\n"; next; } my $relative_mapped_file = $mapped_file; $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///; if (File::Compare::compare($abs_cpan_file, $mapped_file)) { if ($use_diff) { file_diff($outfh, $abs_cpan_file, $mapped_file, $reverse, $diff_opts); } else { if ($cpan_file eq $relative_mapped_file) { print $outfh " Modified: $relative_mapped_file\n"; } else { print $outfh " Modified: $cpan_file $relative_mapped_file\n"; } } } elsif ($verbose) { if ($cpan_file eq $relative_mapped_file) { print $outfh " Unchanged: $cpan_file\n"; } else { print $outfh " Unchanged: $cpan_file $relative_mapped_file\n"; } } } for (sort keys %perl_unseen) { print $outfh " Perl only: $_\n" unless $use_diff; } } } # given FooBar-1.23_45.tar.gz, return FooBar sub distro_base { my $d = shift; $d =~ s/\.tar\.gz$//; $d =~ s/\.gip$//; $d =~ s/[\d\-_\.]+$//; return $d; } # process --crosscheck action: # ie list all distributions whose CPAN versions differ from that listed in # Maintainers.pl sub do_crosscheck { my ($outfh, $cache_dir, $force, $modules) = @_; my $file = '02packages.details.txt'; my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); my $path = File::Spec->catfile($download_dir, $file); my $gzfile = "$path.gz"; # grab 02packages.details.txt my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz'; if (! -f $gzfile or $force) { unlink $gzfile; my_getstore($url, $gzfile); } unlink $path; IO::Uncompress::Gunzip::gunzip($gzfile, $path) or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n"; # suck in the data from it open my $fh, '<', $path or die "ERROR: open: $file: $!\n"; my %distros; my %modules; while (<$fh>) { next if 1../^$/; chomp; my @f = split ' ', $_; if (@f != 3) { warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n"; next; } my $distro = $f[2]; $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/ $modules{$f[0]} = $distro; (my $short_distro = $distro) =~ s{^.*/}{}; $distros{distro_base($short_distro)}{$distro} = 1; } for my $module (@$modules) { my $m = $Maintainers::Modules{$module} or die "ERROR: No such module in Maintainers.pl: '$module'\n"; unless ($m->{CPAN}) { print $outfh "\nWARNING: $module is not dual-life; skipping\n"; next; } # given an entry like # Foo::Bar 1.23 foo-bar-1.23.tar.gz, # first compare the module name against Foo::Bar, and failing that, # against foo-bar my $pdist = $m->{DISTRIBUTION}; die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist; my $cdist = $modules{$module}; (my $short_pdist = $pdist) =~ s{^.*/}{}; unless (defined $cdist) { my $d = $distros{distro_base($short_pdist)}; unless (defined $d) { print $outfh "\n$module: Can't determine current CPAN entry\n"; next; } if (keys %$d > 1) { print $outfh "\n$module: (found more than one CPAN candidate):\n"; print $outfh " perl: $pdist\n"; print $outfh " CPAN: $_\n" for sort keys %$d; next; } $cdist = (keys %$d)[0]; } if ($cdist ne $pdist) { print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n"; } } } # get the EXCLUDED and MAP entries for this module, or # make up defauts if they don't exist sub get_map { my ($m, $module_name, $perl_files) = @_; my ($excluded, $map) = @$m{qw(EXCLUDED MAP)}; $excluded ||= []; return $excluded, $map if $map; # all files under ext/foo-bar (plus maybe some under t/lib)??? my $ext; for (@$perl_files) { if (m{^((?:ext|dist|cpan)/[^/]+/)}) { if (defined $ext and $ext ne $1) { # more than one ext/$ext/ undef $ext; last; } $ext = $1; } elsif (m{^t/lib/}) { next; } else { undef $ext; last; } } if (defined $ext) { $map = { '' => $ext }, } else { (my $base = $module_name) =~ s{::}{/}g; $base ="lib/$base"; $map = { 'lib/' => 'lib/', '' => "$base/", }; } return $excluded, $map; } # Given an exclude list and a mapping hash, convert a CPAN filename # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t'). # Returns an empty list for an excluded file sub cpan_to_perl { my ($excluded, $map, $cpan_file) = @_; for my $exclude (@$excluded) { # may be a simple string to match exactly, or a pattern if (ref $exclude) { return if $cpan_file =~ $exclude; } else { return if $cpan_file eq $exclude; } } my $perl_file = $cpan_file; # try longest prefix first, then alphabetically on tie-break for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map) { last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/; } return $perl_file; } # do LWP::Simple::getstore, possibly without LWP::Simple being available my $lwp_simple_available; sub my_getstore { my ($url, $file) = @_; unless (defined $lwp_simple_available) { eval { require LWP::Simple }; $lwp_simple_available = $@ eq ''; } if ($lwp_simple_available) { return LWP::Simple::is_success(LWP::Simple::getstore($url, $file)); } else { return system(WGET_CMD, "-O", $file, $url) == 0; } } # download and unpack a distribution # Returns the full pathname of the extracted directory # (eg '/tmp/XYZ/Foo_bar-1.23') # cache_dir: where to dowenload the .tar.gz file to # untar_dir: where to untar or unzup the file # module: name of module # dist: name of the distribution sub get_distribution { my ($cache_dir, $untar_dir, $module, $dist) = @_; $dist =~ m{.+/([^/]+)$} or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n"; my $filename = $1; my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); my $download_file = File::Spec->catfile($download_dir, $filename); # download distribution if (-f $download_file and ! -s $download_file ) { # wget can leave a zero-length file on failed download unlink $download_file; } unless (-f $download_file) { # not cached $dist =~ /^([A-Z])([A-Z])/ or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n"; my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist"; my_getstore($url, $download_file) or die "ERROR: Could not fetch '$url'\n"; } # extract distribution my $ae = Archive::Extract->new( archive => $download_file); $ae->extract( to => $untar_dir ) or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n"; # get the name of the extracted distribution dir my $path = File::Spec->catfile($untar_dir, $filename); $path =~ s/\.tar\.gz$// or $path =~ s/\.zip$// or die "ERROR: downloaded file does not have a recognised suffix: $path\n"; die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path; return $path; } # produce the diff of a single file sub file_diff { my $outfh = shift; my $cpan_file = shift; my $perl_file = shift; my $reverse = shift; my $diff_opts = shift; my @cmd = (DIFF_CMD, split ' ', $diff_opts); if ($reverse) { push @cmd, $perl_file, $cpan_file; } else { push @cmd, $cpan_file, $perl_file; } my $result = `@cmd`; $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm; print $outfh $result; } run();