3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
13 use File::Basename ();
18 use File::Spec::Functions;
19 use IO::Uncompress::Gunzip ();
21 use ExtUtils::Manifest;
22 use ExtUtils::MakeMaker ();
25 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
30 use Cwd qw[cwd chdir];
33 local $Archive::Tar::WARN=0;
35 # where, under the cache dir, to download tarballs to
36 use constant SRC_DIR => 'tarballs';
38 # where, under the cache dir, to untar stuff to
39 use constant UNTAR_DIR => 'untarred';
41 use constant DIFF_CMD => 'diff';
44 print STDERR "\n@_\n\n" if @_;
46 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
48 -a/--all Scan all dual-life modules.
50 -c/--cachedir Where to save downloaded CPAN tarball files
51 (defaults to /tmp/something/ with deletion after each run).
53 -d/--diff Display file differences using diff(1), rather than just
54 listing which files have changed.
56 --diffopts Options to pass to the diff command. Defaults to '-u --binary'
57 (except on *BSD, where it's just '-u').
59 -f|force Force download from CPAN of new 02packages.details.txt file
60 (with --crosscheck only).
62 -m|mirror Preferred CPAN mirror URI (http:// or file:///)
63 (Local mirror must be a complete mirror, not minicpan)
65 -o/--output File name to write output to (defaults to STDOUT).
67 -r/--reverse Reverses the diff (perl to CPAN).
69 -u/--upstream only print modules with the given upstream (defaults to all)
71 -v/--verbose List the fate of *all* files in the tarball, not just those
72 that differ or are missing.
74 -x|crosscheck List the distributions whose current CPAN version differs from
75 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
77 By default (i.e. without the --crosscheck option), for each listed module
78 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
79 from CPAN associated with that module, and compare the files in it with
80 those in the perl source tree.
82 Must be run from the root of the perl source tree.
83 Module names must match the keys of %Modules in Maintainers.pl.
85 The diff(1) command is assumed to be in your PATH and is used to diff files
86 regardless of whether the --diff option has been chosen to display any file
98 my $mirror_url = "http://www.cpan.org/";
106 'a|all' => \$scan_all,
107 'c|cachedir=s' => \$cache_dir,
108 'd|diff' => \$use_diff,
109 'diffopts:s' => \$diff_opts,
110 'f|force' => \$force,
112 'm|mirror=s' => \$mirror_url,
113 'o|output=s' => \$output_file,
114 'r|reverse' => \$reverse,
115 'u|upstream=s@' => \@wanted_upstreams,
116 'v|verbose:1' => \$verbose,
117 'x|crosscheck' => \$do_crosscheck,
122 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
124 if ($do_crosscheck) {
125 usage("can't use -r, -d, --diffopts with --crosscheck")
126 if ( $reverse || $use_diff || $diff_opts );
129 #$diff_opts = '-u --binary' unless defined $diff_opts;
130 if (! defined $diff_opts) {
131 $diff_opts = ($^O =~ m/bsd$/i) ? '-u' : '-u --binary';
133 usage("can't use -f without --crosscheck") if $force;
138 ? grep $Maintainers::Modules{$_}{CPAN},
139 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
141 usage("No modules specified") unless @modules;
144 if ( defined $output_file ) {
145 open $outfh, '>', $output_file
146 or die "ERROR: could not open file '$output_file' for writing: $!\n";
149 open $outfh, ">&STDOUT"
150 or die "ERROR: can't dup STDOUT: $!\n";
153 if ( defined $cache_dir ) {
154 die "ERROR: not a directory: '$cache_dir'\n"
155 if !-d $cache_dir && -e $cache_dir;
156 File::Path::mkpath($cache_dir);
159 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
162 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
163 my $test_file = "modules/03modlist.data.gz";
165 cpan_url( $mirror_url, $test_file ),
166 catfile( $cache_dir, $test_file )
167 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
169 if ($do_crosscheck) {
171 $outfh, $cache_dir, $mirror_url, $verbose,
172 $force, \@modules, \@wanted_upstreams
176 $verbose > 2 and $use_diff++;
178 \@modules, $outfh, $output_file,
179 $cache_dir, $mirror_url, $verbose,
180 $use_diff, $reverse, $diff_opts,
186 # construct a CPAN url
189 my ( $mirror_url, @path ) = @_;
190 return $mirror_url unless @path;
191 my $cpan_path = join( "/", map { split "/", $_ } @path );
192 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
193 return $mirror_url . $cpan_path;
196 # construct a CPAN URL for a author/distribution string like:
197 # BINGOS/Archive-Extract-0.52.tar.gz
199 sub cpan_url_distribution {
200 my ( $mirror_url, $distribution ) = @_;
201 $distribution =~ /^([A-Z])([A-Z])/
202 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
203 my $path = "authors/id/$1/$1$2/$distribution";
204 return cpan_url( $mirror_url, $path );
207 # compare a list of modules against their CPAN equivalents
211 $modules, $outfh, $output_file, $cache_dir,
212 $mirror_url, $verbose, $use_diff, $reverse,
213 $diff_opts, $wanted_upstreams
216 # first, make sure we have a directory where they can all be untarred,
217 # and if its a permanent directory, clear any previous content
218 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
219 my $src_dir = catdir( $cache_dir, SRC_DIR );
220 for my $d ( $src_dir, $untar_dir ) {
222 mkdir $d or die "mkdir $d: $!\n";
225 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
226 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
229 for my $module (@$modules) {
230 warn "Processing $module ...\n" if defined $output_file;
232 my $m = $Maintainers::Modules{$module}
233 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
235 unless ( $m->{CPAN} ) {
236 print $outfh "WARNING: $module is not dual-life; skipping\n";
240 my $dist = $m->{DISTRIBUTION};
241 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
243 if ( $seen_dist{$dist}++ ) {
244 warn "WARNING: duplicate entry for $dist in $module\n";
247 my $upstream = $m->{UPSTREAM} // 'undef';
248 next if @$wanted_upstreams and !$wanted_upstream{$upstream};
250 print $outfh "\n$module - "
251 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
252 print $outfh " upstream is: "
253 . ( $m->{UPSTREAM} // 'UNKNOWN!' ) . "\n";
258 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
262 print $outfh " ", $@;
263 print $outfh " (skipping)\n";
267 my @perl_files = Maintainers::get_module_files($module);
269 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
270 die "ERROR: no such file: $manifest\n" unless -f $manifest;
272 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
273 my @cpan_files = sort keys %$cpan_files;
275 ( my $main_pm = $module ) =~ s{::}{/}g;
278 my ( $excluded, $map, $customized ) =
279 get_map( $m, $module, \@perl_files );
282 @perl_unseen{@perl_files} = ();
283 my %perl_files = %perl_unseen;
285 foreach my $cpan_file (@cpan_files) {
287 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
288 unless ( defined $mapped_file ) {
289 print $outfh " Excluded: $cpan_file\n" if $verbose;
293 if ( exists $perl_files{$mapped_file} ) {
294 delete $perl_unseen{$mapped_file};
298 # some CPAN files foo are stored in core as foo.packed,
299 # which are then unpacked by 'make test_prep'
300 my $packed_file = "$mapped_file.packed";
301 if ( exists $perl_files{$packed_file} ) {
302 if ( !-f $mapped_file and -f $packed_file ) {
304 WARNING: $mapped_file not found, but .packed variant exists.
305 Perhaps you need to run 'make test_prep'?
309 delete $perl_unseen{$packed_file};
312 if ( $ignorable{$cpan_file} ) {
313 print $outfh " Ignored: $cpan_file\n" if $verbose;
318 print $outfh " CPAN only: $cpan_file",
319 ( $cpan_file eq $mapped_file )
321 : " (missing $mapped_file)\n";
327 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
329 # should never happen
330 die "ERROR: can't find file $abs_cpan_file\n"
331 unless -f $abs_cpan_file;
333 # might happen if the FILES entry in Maintainers.pl is wrong
334 unless ( -f $mapped_file ) {
335 print $outfh "WARNING: perl file not found: $mapped_file\n";
339 my $relative_mapped_file = relatively_mapped($mapped_file);
342 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
344 if ( $different && customized( $m, $relative_mapped_file ) ) {
345 print $outfh " Customized for blead: $relative_mapped_file\n";
346 if ( $use_diff && $verbose ) {
347 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
348 print $outfh $different;
353 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
354 print $outfh $different;
357 if ( $cpan_file eq $relative_mapped_file ) {
358 print $outfh " Modified: $relative_mapped_file\n";
362 " Modified: $cpan_file $relative_mapped_file\n";
365 if ( $cpan_file =~ m{\.pm\z} ) {
366 my $pv = MM->parse_version($mapped_file) || 'unknown';
367 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
370 " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
376 elsif ( customized( $m, $relative_mapped_file ) ) {
377 # Maintainers.pl says we customized it, but it looks the
378 # same as CPAN so maybe we lost the customization, which
380 if ( $cpan_file eq $relative_mapped_file ) {
381 print $outfh " Blead customization missing: $cpan_file\n";
385 " Blead customization missing: $cpan_file $relative_mapped_file\n";
389 if ( $cpan_file eq $relative_mapped_file ) {
390 print $outfh " Unchanged: $cpan_file\n";
394 " Unchanged: $cpan_file $relative_mapped_file\n";
398 for ( sort keys %perl_unseen ) {
399 my $relative_mapped_file = relatively_mapped($_);
400 if ( customized( $m, $relative_mapped_file ) ) {
401 print $outfh " Customized for blead: $_\n";
404 print $outfh " Perl only: $_\n" unless $use_diff;
408 foreach my $exclude (@$excluded) {
410 foreach my $cpan_file (@cpan_files) {
411 # may be a simple string to match exactly, or a pattern
412 if ( ref $exclude ) {
413 $seen = 1 if $cpan_file =~ $exclude;
416 $seen = 1 if $cpan_file eq $exclude;
421 print $outfh " Unnecessary exclusion: $exclude\n";
428 sub relatively_mapped {
429 my $relative = shift;
430 $relative =~ s/^(cpan|dist|ext)\/.*?\///;
434 # given FooBar-1.23_45.tar.gz, return FooBar
438 $d =~ s/\.tar\.gz$//;
440 $d =~ s/[\d\-_\.]+$//;
444 # process --crosscheck action:
445 # ie list all distributions whose CPAN versions differ from that listed in
450 $outfh, $cache_dir, $mirror_url, $verbose,
451 $force, $modules, $wanted_upstreams,
454 my $file = '02packages.details.txt';
455 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
456 my $path = catfile( $download_dir, $file );
457 my $gzfile = "$path.gz";
459 # grab 02packages.details.txt
461 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
463 if ( !-f $gzfile or $force ) {
465 my_getstore( $url, $gzfile );
468 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
470 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
472 # suck in the data from it
474 open my $fh, '<', $path
475 or die "ERROR: open: $file: $!\n";
483 my @f = split ' ', $_;
486 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
490 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
491 $modules{ $f[0] } = $distro;
493 ( my $short_distro = $distro ) =~ s{^.*/}{};
495 $distros{ distro_base($short_distro) }{$distro} = 1;
498 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
499 for my $module (@$modules) {
500 my $m = $Maintainers::Modules{$module}
501 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
503 $verbose and warn "Checking $module\n";
505 unless ( $m->{CPAN} ) {
506 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
510 # given an entry like
511 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
512 # first compare the module name against Foo::Bar, and failing that,
515 my $pdist = $m->{DISTRIBUTION};
516 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
518 my $upstream = $m->{UPSTREAM} // 'undef';
519 next if @$wanted_upstreams and !$wanted_upstream{$upstream};
521 my $cdist = $modules{$module};
522 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
524 unless ( defined $cdist ) {
525 my $d = $distros{ distro_base($short_pdist) };
526 unless ( defined $d ) {
527 print $outfh "\n$module: Can't determine current CPAN entry\n";
530 if ( keys %$d > 1 ) {
532 "\n$module: (found more than one CPAN candidate):\n";
533 print $outfh " Perl: $pdist\n";
534 print $outfh " CPAN: $_\n" for sort keys %$d;
537 $cdist = ( keys %$d )[0];
540 if ( $cdist ne $pdist ) {
541 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
546 # get the EXCLUDED and MAP entries for this module, or
547 # make up defaults if they don't exist
550 my ( $m, $module_name, $perl_files ) = @_;
552 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
557 return $excluded, $map, $customized if $map;
559 # all files under ext/foo-bar (plus maybe some under t/lib)???
563 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
564 if ( defined $ext and $ext ne $1 ) {
566 # more than one ext/$ext/
581 if ( defined $ext ) {
582 $map = { '' => $ext },;
585 ( my $base = $module_name ) =~ s{::}{/}g;
592 return $excluded, $map, $customized;
595 # Given an exclude list and a mapping hash, convert a CPAN filename
596 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
597 # Returns an empty list for an excluded file
600 my ( $excluded, $map, $customized, $cpan_file ) = @_;
602 my %customized = map { ( $_ => 1 ) } @$customized;
603 for my $exclude (@$excluded) {
604 next if $customized{$exclude};
606 # may be a simple string to match exactly, or a pattern
607 if ( ref $exclude ) {
608 return if $cpan_file =~ $exclude;
611 return if $cpan_file eq $exclude;
615 my $perl_file = $cpan_file;
617 # try longest prefix first, then alphabetically on tie-break
619 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
621 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
626 # fetch a file from a URL and store it in a file given by a filename
629 my ( $url, $file ) = @_;
630 File::Path::mkpath( File::Basename::dirname($file) );
631 if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
632 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
633 File::Copy::copy( $local_path, $file );
635 my $http = HTTP::Tiny->new;
636 my $response = $http->mirror($url, $file);
637 return $response->{success};
641 # download and unpack a distribution
642 # Returns the full pathname of the extracted directory
643 # (eg '/tmp/XYZ/Foo_bar-1.23')
645 # cache_dir: where to download the .tar.gz file to
646 # mirror_url: CPAN mirror to download from
647 # untar_dir: where to untar or unzup the file
648 # module: name of module
649 # dist: name of the distribution
651 sub get_distribution {
652 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
654 $dist =~ m{.+/([^/]+)$}
656 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
659 my $download_file = catfile( $src_dir, $filename );
661 # download distribution
663 if ( -f $download_file and !-s $download_file ) {
665 # failed download might leave a zero-length file
666 unlink $download_file;
669 unless ( -f $download_file ) {
672 my $url = cpan_url_distribution( $mirror_url, $dist );
673 my_getstore( $url, $download_file )
674 or die "ERROR: Could not fetch '$url'\n";
677 # get the expected name of the extracted distribution dir
679 my $path = catfile( $untar_dir, $filename );
681 $path =~ s/\.tar\.gz$//
682 or $path =~ s/\.tgz$//
683 or $path =~ s/\.zip$//
685 "ERROR: downloaded file does not have a recognised suffix: $path\n";
687 # extract it unless we already have it cached or tarball is newer
688 if ( !-d $path || ( -M $download_file < -M $path ) ) {
689 $path = extract( $download_file, $untar_dir )
691 "ERROR: failed to extract distribution '$download_file to temp. dir: "
695 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
700 # produce the diff of a single file
703 my $cpan_file = shift;
704 my $perl_file = shift;
706 my $diff_opts = shift;
708 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
710 push @cmd, $perl_file, $cpan_file;
713 push @cmd, $cpan_file, $perl_file;
720 my ( $module_data, $file ) = @_;
721 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };
725 my ($archive,$to) = @_;
727 chdir $to or die "$!\n";
730 local $Archive::Tar::CHOWN = 0;
732 unless ( $next = Archive::Tar->iter( $archive, 1 ) ) {
733 $! = $Archive::Tar::error;
736 while ( my $file = $next->() ) {
737 push @files, $file->full_path;
738 unless ( $file->extract ) {
739 $! = $Archive::Tar::error;
744 my $path = __get_extract_dir( \@files );
745 chdir $cwd or die "$!\n";
749 sub __get_extract_dir {
750 my $files = shift || [];
752 return unless scalar @$files;
755 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
756 my($dir,$pos) = @$aref;
758 ### add a catdir(), so that any trailing slashes get
759 ### take care of (removed)
760 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
761 ### which was the problem in bug #23999
762 my $res = -d $files->[$pos]
763 ? File::Spec->catdir( $files->[$pos], '' )
764 : File::Spec->catdir( File::Basename::dirname( $files->[$pos] ) );
769 ### if the first and last dir don't match, make sure the
770 ### dirname is not set wrongly
773 ### dirs are the same, so we know for sure what the extract dir is
774 if( $dir1 eq $dir2 ) {
777 ### dirs are different.. do they share the base dir?
778 ### if so, use that, if not, fall back to '.'
780 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
781 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
783 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
786 return File::Spec->rel2abs( $dir );