3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
13 use File::Basename ();
18 use File::Spec::Functions;
19 use IO::Uncompress::Gunzip ();
21 use ExtUtils::Manifest;
22 use ExtUtils::MakeMaker ();
25 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
30 use Cwd qw[cwd chdir];
33 local $Archive::Tar::WARN=0;
35 # where, under the cache dir, to download tarballs to
36 use constant SRC_DIR => 'tarballs';
38 # where, under the cache dir, to untar stuff to
39 use constant UNTAR_DIR => 'untarred';
41 use constant DIFF_CMD => 'diff';
44 print STDERR "\n@_\n\n" if @_;
46 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
48 -a/--all Scan all dual-life modules.
50 -c/--cachedir Where to save downloaded CPAN tarball files
51 (defaults to /tmp/something/ with deletion after each run).
53 -d/--diff Display file differences using diff(1), rather than just
54 listing which files have changed.
56 --diffopts Options to pass to the diff command. Defaults to '-u'.
58 -f|force Force download from CPAN of new 02packages.details.txt file
59 (with --crosscheck only).
61 -m|mirror Preferred CPAN mirror URI (http:// or file:///)
62 (Local mirror must be a complete mirror, not minicpan)
64 -o/--output File name to write output to (defaults to STDOUT).
66 -r/--reverse Reverses the diff (perl to CPAN).
68 -u/--upstream only print modules with the given upstream (defaults to all)
70 -v/--verbose List the fate of *all* files in the tarball, not just those
71 that differ or are missing.
73 -x|crosscheck List the distributions whose current CPAN version differs from
74 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
76 By default (i.e. without the --crosscheck option), for each listed module
77 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
78 from CPAN associated with that module, and compare the files in it with
79 those in the perl source tree.
81 Must be run from the root of the perl source tree.
82 Module names must match the keys of %Modules in Maintainers.pl.
84 The diff(1) command is assumed to be in your PATH and is used to diff files
85 regardless of whether the --diff option has been chosen to display any file
97 my $mirror_url = "http://www.cpan.org/";
105 'a|all' => \$scan_all,
106 'c|cachedir=s' => \$cache_dir,
107 'd|diff' => \$use_diff,
108 'diffopts:s' => \$diff_opts,
109 'f|force' => \$force,
111 'm|mirror=s' => \$mirror_url,
112 'o|output=s' => \$output_file,
113 'r|reverse' => \$reverse,
114 'u|upstream=s@' => \@wanted_upstreams,
115 'v|verbose' => \$verbose,
116 'x|crosscheck' => \$do_crosscheck,
121 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
123 if ($do_crosscheck) {
124 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
125 if ( $reverse || $use_diff || $diff_opts || $verbose );
128 $diff_opts = '-u -b' unless defined $diff_opts;
129 usage("can't use -f without --crosscheck") if $force;
134 ? grep $Maintainers::Modules{$_}{CPAN},
135 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
137 usage("No modules specified") unless @modules;
140 if ( defined $output_file ) {
141 open $outfh, '>', $output_file
142 or die "ERROR: could not open file '$output_file' for writing: $!\n";
145 open $outfh, ">&STDOUT"
146 or die "ERROR: can't dup STDOUT: $!\n";
149 if ( defined $cache_dir ) {
150 die "ERROR: not a directory: '$cache_dir'\n"
151 if !-d $cache_dir && -e $cache_dir;
152 File::Path::mkpath($cache_dir);
155 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
158 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
159 my $test_file = "modules/03modlist.data.gz";
161 cpan_url( $mirror_url, $test_file ),
162 catfile( $cache_dir, $test_file )
163 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
165 if ($do_crosscheck) {
167 $outfh, $cache_dir, $mirror_url,
168 $force, \@modules, \@wanted_upstreams
173 \@modules, $outfh, $output_file,
174 $cache_dir, $mirror_url, $verbose,
175 $use_diff, $reverse, $diff_opts,
181 # construct a CPAN url
184 my ( $mirror_url, @path ) = @_;
185 return $mirror_url unless @path;
186 my $cpan_path = join( "/", map { split "/", $_ } @path );
187 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
188 return $mirror_url . $cpan_path;
191 # construct a CPAN URL for a author/distribution string like:
192 # BINGOS/Archive-Extract-0.52.tar.gz
194 sub cpan_url_distribution {
195 my ( $mirror_url, $distribution ) = @_;
196 $distribution =~ /^([A-Z])([A-Z])/
197 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
198 my $path = "authors/id/$1/$1$2/$distribution";
199 return cpan_url( $mirror_url, $path );
202 # compare a list of modules against their CPAN equivalents
206 $modules, $outfh, $output_file, $cache_dir,
207 $mirror_url, $verbose, $use_diff, $reverse,
208 $diff_opts, $wanted_upstreams
211 # first, make sure we have a directory where they can all be untarred,
212 # and if its a permanent directory, clear any previous content
213 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
214 my $src_dir = catdir( $cache_dir, SRC_DIR );
215 for my $d ( $src_dir, $untar_dir ) {
217 mkdir $d or die "mkdir $d: $!\n";
220 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
221 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
224 for my $module (@$modules) {
225 warn "Processing $module ...\n" if defined $output_file;
227 my $m = $Maintainers::Modules{$module}
228 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
230 unless ( $m->{CPAN} ) {
231 print $outfh "WARNING: $module is not dual-life; skipping\n";
235 my $dist = $m->{DISTRIBUTION};
236 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
238 if ( $seen_dist{$dist}++ ) {
239 warn "WARNING: duplicate entry for $dist in $module\n";
242 my $upstream = $m->{UPSTREAM} // 'undef';
243 next if @$wanted_upstreams and !$wanted_upstream{$upstream};
245 print $outfh "\n$module - "
246 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
247 print $outfh " upstream is: "
248 . ( $m->{UPSTREAM} // 'UNKNOWN!' ) . "\n";
253 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
257 print $outfh " ", $@;
258 print $outfh " (skipping)\n";
262 my @perl_files = Maintainers::get_module_files($module);
264 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
265 die "ERROR: no such file: $manifest\n" unless -f $manifest;
267 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
268 my @cpan_files = sort keys %$cpan_files;
270 ( my $main_pm = $module ) =~ s{::}{/}g;
273 my ( $excluded, $map, $customized ) =
274 get_map( $m, $module, \@perl_files );
277 @perl_unseen{@perl_files} = ();
278 my %perl_files = %perl_unseen;
280 foreach my $cpan_file (@cpan_files) {
282 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
283 unless ( defined $mapped_file ) {
284 print $outfh " Excluded: $cpan_file\n" if $verbose;
288 if ( exists $perl_files{$mapped_file} ) {
289 delete $perl_unseen{$mapped_file};
293 # some CPAN files foo are stored in core as foo.packed,
294 # which are then unpacked by 'make test_prep'
295 my $packed_file = "$mapped_file.packed";
296 if ( exists $perl_files{$packed_file} ) {
297 if ( !-f $mapped_file and -f $packed_file ) {
299 WARNING: $mapped_file not found, but .packed variant exists.
300 Perhaps you need to run 'make test_prep'?
304 delete $perl_unseen{$packed_file};
307 if ( $ignorable{$cpan_file} ) {
308 print $outfh " Ignored: $cpan_file\n" if $verbose;
313 print $outfh " CPAN only: $cpan_file",
314 ( $cpan_file eq $mapped_file )
316 : " (missing $mapped_file)\n";
322 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
324 # should never happen
325 die "ERROR: can't find file $abs_cpan_file\n"
326 unless -f $abs_cpan_file;
328 # might happen if the FILES entry in Maintainers.pl is wrong
329 unless ( -f $mapped_file ) {
330 print $outfh "WARNING: perl file not found: $mapped_file\n";
334 my $relative_mapped_file = relatively_mapped($mapped_file);
337 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
339 if ( $different && customized( $m, $relative_mapped_file ) ) {
341 print $outfh " Customized for blead: $relative_mapped_file\n";
346 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
347 print $outfh $different;
350 if ( $cpan_file eq $relative_mapped_file ) {
351 print $outfh " Modified: $relative_mapped_file\n";
355 " Modified: $cpan_file $relative_mapped_file\n";
358 if ( $cpan_file =~ m{\.pm\z} ) {
359 my $pv = MM->parse_version($mapped_file) || 'unknown';
360 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
363 " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
369 elsif ( customized( $m, $relative_mapped_file ) ) {
370 # Maintainers.pl says we customized it, but it looks the
371 # same as CPAN so maybe we lost the customization, which
373 if ( $cpan_file eq $relative_mapped_file ) {
374 print $outfh " Blead customization missing: $cpan_file\n";
378 " Blead customization missing: $cpan_file $relative_mapped_file\n";
382 if ( $cpan_file eq $relative_mapped_file ) {
383 print $outfh " Unchanged: $cpan_file\n";
387 " Unchanged: $cpan_file $relative_mapped_file\n";
391 for ( sort keys %perl_unseen ) {
392 my $relative_mapped_file = relatively_mapped($_);
393 if ( customized( $m, $relative_mapped_file ) ) {
394 print $outfh " Customized for blead: $_\n";
397 print $outfh " Perl only: $_\n" unless $use_diff;
401 foreach my $exclude (@$excluded) {
403 foreach my $cpan_file (@cpan_files) {
404 # may be a simple string to match exactly, or a pattern
405 if ( ref $exclude ) {
406 $seen = 1 if $cpan_file =~ $exclude;
409 $seen = 1 if $cpan_file eq $exclude;
414 print $outfh " Unnecessary exclusion: $exclude\n";
421 sub relatively_mapped {
422 my $relative = shift;
423 $relative =~ s/^(cpan|dist|ext)\/.*?\///;
427 # given FooBar-1.23_45.tar.gz, return FooBar
431 $d =~ s/\.tar\.gz$//;
433 $d =~ s/[\d\-_\.]+$//;
437 # process --crosscheck action:
438 # ie list all distributions whose CPAN versions differ from that listed in
443 $outfh, $cache_dir, $mirror_url,
444 $force, $modules, $wanted_upstreams,
447 my $file = '02packages.details.txt';
448 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
449 my $path = catfile( $download_dir, $file );
450 my $gzfile = "$path.gz";
452 # grab 02packages.details.txt
454 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
456 if ( !-f $gzfile or $force ) {
458 my_getstore( $url, $gzfile );
461 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
463 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
465 # suck in the data from it
467 open my $fh, '<', $path
468 or die "ERROR: open: $file: $!\n";
476 my @f = split ' ', $_;
479 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
483 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
484 $modules{ $f[0] } = $distro;
486 ( my $short_distro = $distro ) =~ s{^.*/}{};
488 $distros{ distro_base($short_distro) }{$distro} = 1;
491 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
492 for my $module (@$modules) {
493 my $m = $Maintainers::Modules{$module}
494 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
496 unless ( $m->{CPAN} ) {
497 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
501 # given an entry like
502 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
503 # first compare the module name against Foo::Bar, and failing that,
506 my $pdist = $m->{DISTRIBUTION};
507 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
509 my $upstream = $m->{UPSTREAM} // 'undef';
510 next if @$wanted_upstreams and !$wanted_upstream{$upstream};
512 my $cdist = $modules{$module};
513 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
515 unless ( defined $cdist ) {
516 my $d = $distros{ distro_base($short_pdist) };
517 unless ( defined $d ) {
518 print $outfh "\n$module: Can't determine current CPAN entry\n";
521 if ( keys %$d > 1 ) {
523 "\n$module: (found more than one CPAN candidate):\n";
524 print $outfh " perl: $pdist\n";
525 print $outfh " CPAN: $_\n" for sort keys %$d;
528 $cdist = ( keys %$d )[0];
531 if ( $cdist ne $pdist ) {
532 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
537 # get the EXCLUDED and MAP entries for this module, or
538 # make up defaults if they don't exist
541 my ( $m, $module_name, $perl_files ) = @_;
543 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
548 return $excluded, $map, $customized if $map;
550 # all files under ext/foo-bar (plus maybe some under t/lib)???
554 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
555 if ( defined $ext and $ext ne $1 ) {
557 # more than one ext/$ext/
572 if ( defined $ext ) {
573 $map = { '' => $ext },;
576 ( my $base = $module_name ) =~ s{::}{/}g;
583 return $excluded, $map, $customized;
586 # Given an exclude list and a mapping hash, convert a CPAN filename
587 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
588 # Returns an empty list for an excluded file
591 my ( $excluded, $map, $customized, $cpan_file ) = @_;
593 my %customized = map { ( $_ => 1 ) } @$customized;
594 for my $exclude (@$excluded) {
595 next if $customized{$exclude};
597 # may be a simple string to match exactly, or a pattern
598 if ( ref $exclude ) {
599 return if $cpan_file =~ $exclude;
602 return if $cpan_file eq $exclude;
606 my $perl_file = $cpan_file;
608 # try longest prefix first, then alphabetically on tie-break
610 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
612 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
617 # fetch a file from a URL and store it in a file given by a filename
620 my ( $url, $file ) = @_;
621 File::Path::mkpath( File::Basename::dirname($file) );
622 if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
623 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
624 File::Copy::copy( $local_path, $file );
626 my $http = HTTP::Tiny->new;
627 my $response = $http->mirror($url, $file);
628 return $response->{success};
632 # download and unpack a distribution
633 # Returns the full pathname of the extracted directory
634 # (eg '/tmp/XYZ/Foo_bar-1.23')
636 # cache_dir: where to download the .tar.gz file to
637 # mirror_url: CPAN mirror to download from
638 # untar_dir: where to untar or unzup the file
639 # module: name of module
640 # dist: name of the distribution
642 sub get_distribution {
643 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
645 $dist =~ m{.+/([^/]+)$}
647 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
650 my $download_file = catfile( $src_dir, $filename );
652 # download distribution
654 if ( -f $download_file and !-s $download_file ) {
656 # failed download might leave a zero-length file
657 unlink $download_file;
660 unless ( -f $download_file ) {
663 my $url = cpan_url_distribution( $mirror_url, $dist );
664 my_getstore( $url, $download_file )
665 or die "ERROR: Could not fetch '$url'\n";
668 # get the expected name of the extracted distribution dir
670 my $path = catfile( $untar_dir, $filename );
672 $path =~ s/\.tar\.gz$//
673 or $path =~ s/\.tgz$//
674 or $path =~ s/\.zip$//
676 "ERROR: downloaded file does not have a recognised suffix: $path\n";
678 # extract it unless we already have it cached or tarball is newer
679 if ( !-d $path || ( -M $download_file < -M $path ) ) {
680 $path = extract( $download_file, $untar_dir )
682 "ERROR: failed to extract distribution '$download_file to temp. dir: "
686 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
691 # produce the diff of a single file
694 my $cpan_file = shift;
695 my $perl_file = shift;
697 my $diff_opts = shift;
699 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
701 push @cmd, $perl_file, $cpan_file;
704 push @cmd, $cpan_file, $perl_file;
711 my ( $module_data, $file ) = @_;
712 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };
716 my ($archive,$to) = @_;
718 chdir $to or die "$!\n";
721 local $Archive::Tar::CHOWN = 0;
723 unless ( $next = Archive::Tar->iter( $archive, 1 ) ) {
724 $! = $Archive::Tar::error;
727 while ( my $file = $next->() ) {
728 push @files, $file->full_path;
729 unless ( $file->extract ) {
730 $! = $Archive::Tar::error;
735 my $path = __get_extract_dir( \@files );
736 chdir $cwd or die "$!\n";
740 sub __get_extract_dir {
741 my $files = shift || [];
743 return unless scalar @$files;
746 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
747 my($dir,$pos) = @$aref;
749 ### add a catdir(), so that any trailing slashes get
750 ### take care of (removed)
751 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
752 ### which was the problem in bug #23999
753 my $res = -d $files->[$pos]
754 ? File::Spec->catdir( $files->[$pos], '' )
755 : File::Spec->catdir( File::Basename::dirname( $files->[$pos] ) );
760 ### if the first and last dir don't match, make sure the
761 ### dirname is not set wrongly
764 ### dirs are the same, so we know for sure what the extract dir is
765 if( $dir1 eq $dir2 ) {
768 ### dirs are different.. do they share the base dir?
769 ### if so, use that, if not, fall back to '.'
771 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
772 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
774 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
777 return File::Spec->rel2abs( $dir );