3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
13 use File::Basename ();
18 use File::Spec::Functions;
19 use IO::Uncompress::Gunzip ();
21 use ExtUtils::Manifest;
22 use ExtUtils::MakeMaker ();
25 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
30 use Cwd qw[cwd chdir];
33 local $Archive::Tar::WARN=0;
35 # where, under the cache dir, to download tarballs to
36 use constant SRC_DIR => 'tarballs';
38 # where, under the cache dir, to untar stuff to
39 use constant UNTAR_DIR => 'untarred';
41 use constant DIFF_CMD => 'diff';
44 print STDERR "\n@_\n\n" if @_;
46 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
48 -a/--all Scan all dual-life modules.
50 -c/--cachedir Where to save downloaded CPAN tarball files
51 (defaults to /tmp/something/ with deletion after each run).
53 -d/--diff Display file differences using diff(1), rather than just
54 listing which files have changed.
56 --diffopts Options to pass to the diff command. Defaults to '-u --binary'.
58 -f|force Force download from CPAN of new 02packages.details.txt file
59 (with --crosscheck only).
61 -m|mirror Preferred CPAN mirror URI (http:// or file:///)
62 (Local mirror must be a complete mirror, not minicpan)
64 -o/--output File name to write output to (defaults to STDOUT).
66 -r/--reverse Reverses the diff (perl to CPAN).
68 -u/--upstream only print modules with the given upstream (defaults to all)
70 -v/--verbose List the fate of *all* files in the tarball, not just those
71 that differ or are missing.
73 -x|crosscheck List the distributions whose current CPAN version differs from
74 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
76 By default (i.e. without the --crosscheck option), for each listed module
77 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
78 from CPAN associated with that module, and compare the files in it with
79 those in the perl source tree.
81 Must be run from the root of the perl source tree.
82 Module names must match the keys of %Modules in Maintainers.pl.
84 The diff(1) command is assumed to be in your PATH and is used to diff files
85 regardless of whether the --diff option has been chosen to display any file
97 my $mirror_url = "http://www.cpan.org/";
105 'a|all' => \$scan_all,
106 'c|cachedir=s' => \$cache_dir,
107 'd|diff' => \$use_diff,
108 'diffopts:s' => \$diff_opts,
109 'f|force' => \$force,
111 'm|mirror=s' => \$mirror_url,
112 'o|output=s' => \$output_file,
113 'r|reverse' => \$reverse,
114 'u|upstream=s@' => \@wanted_upstreams,
115 'v|verbose:1' => \$verbose,
116 'x|crosscheck' => \$do_crosscheck,
121 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
123 if ($do_crosscheck) {
124 usage("can't use -r, -d, --diffopts with --crosscheck")
125 if ( $reverse || $use_diff || $diff_opts );
128 $diff_opts = '-u --binary' unless defined $diff_opts;
129 usage("can't use -f without --crosscheck") if $force;
134 ? grep $Maintainers::Modules{$_}{CPAN},
135 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
137 usage("No modules specified") unless @modules;
140 if ( defined $output_file ) {
141 open $outfh, '>', $output_file
142 or die "ERROR: could not open file '$output_file' for writing: $!\n";
145 open $outfh, ">&STDOUT"
146 or die "ERROR: can't dup STDOUT: $!\n";
149 if ( defined $cache_dir ) {
150 die "ERROR: not a directory: '$cache_dir'\n"
151 if !-d $cache_dir && -e $cache_dir;
152 File::Path::mkpath($cache_dir);
155 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
158 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
159 my $test_file = "modules/03modlist.data.gz";
161 cpan_url( $mirror_url, $test_file ),
162 catfile( $cache_dir, $test_file )
163 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
165 if ($do_crosscheck) {
167 $outfh, $cache_dir, $mirror_url, $verbose,
168 $force, \@modules, \@wanted_upstreams
172 $verbose > 2 and $use_diff++;
174 \@modules, $outfh, $output_file,
175 $cache_dir, $mirror_url, $verbose,
176 $use_diff, $reverse, $diff_opts,
182 # construct a CPAN url
185 my ( $mirror_url, @path ) = @_;
186 return $mirror_url unless @path;
187 my $cpan_path = join( "/", map { split "/", $_ } @path );
188 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
189 return $mirror_url . $cpan_path;
192 # construct a CPAN URL for a author/distribution string like:
193 # BINGOS/Archive-Extract-0.52.tar.gz
195 sub cpan_url_distribution {
196 my ( $mirror_url, $distribution ) = @_;
197 $distribution =~ /^([A-Z])([A-Z])/
198 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
199 my $path = "authors/id/$1/$1$2/$distribution";
200 return cpan_url( $mirror_url, $path );
203 # compare a list of modules against their CPAN equivalents
207 $modules, $outfh, $output_file, $cache_dir,
208 $mirror_url, $verbose, $use_diff, $reverse,
209 $diff_opts, $wanted_upstreams
212 # first, make sure we have a directory where they can all be untarred,
213 # and if its a permanent directory, clear any previous content
214 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
215 my $src_dir = catdir( $cache_dir, SRC_DIR );
216 for my $d ( $src_dir, $untar_dir ) {
218 mkdir $d or die "mkdir $d: $!\n";
221 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
222 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
225 for my $module (@$modules) {
226 warn "Processing $module ...\n" if defined $output_file;
228 my $m = $Maintainers::Modules{$module}
229 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
231 unless ( $m->{CPAN} ) {
232 print $outfh "WARNING: $module is not dual-life; skipping\n";
236 my $dist = $m->{DISTRIBUTION};
237 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
239 if ( $seen_dist{$dist}++ ) {
240 warn "WARNING: duplicate entry for $dist in $module\n";
243 my $upstream = $m->{UPSTREAM} // 'undef';
244 next if @$wanted_upstreams and !$wanted_upstream{$upstream};
246 print $outfh "\n$module - "
247 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
248 print $outfh " upstream is: "
249 . ( $m->{UPSTREAM} // 'UNKNOWN!' ) . "\n";
254 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
258 print $outfh " ", $@;
259 print $outfh " (skipping)\n";
263 my @perl_files = Maintainers::get_module_files($module);
265 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
266 die "ERROR: no such file: $manifest\n" unless -f $manifest;
268 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
269 my @cpan_files = sort keys %$cpan_files;
271 ( my $main_pm = $module ) =~ s{::}{/}g;
274 my ( $excluded, $map, $customized ) =
275 get_map( $m, $module, \@perl_files );
278 @perl_unseen{@perl_files} = ();
279 my %perl_files = %perl_unseen;
281 foreach my $cpan_file (@cpan_files) {
283 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
284 unless ( defined $mapped_file ) {
285 print $outfh " Excluded: $cpan_file\n" if $verbose;
289 if ( exists $perl_files{$mapped_file} ) {
290 delete $perl_unseen{$mapped_file};
294 # some CPAN files foo are stored in core as foo.packed,
295 # which are then unpacked by 'make test_prep'
296 my $packed_file = "$mapped_file.packed";
297 if ( exists $perl_files{$packed_file} ) {
298 if ( !-f $mapped_file and -f $packed_file ) {
300 WARNING: $mapped_file not found, but .packed variant exists.
301 Perhaps you need to run 'make test_prep'?
305 delete $perl_unseen{$packed_file};
308 if ( $ignorable{$cpan_file} ) {
309 print $outfh " Ignored: $cpan_file\n" if $verbose;
314 print $outfh " CPAN only: $cpan_file",
315 ( $cpan_file eq $mapped_file )
317 : " (missing $mapped_file)\n";
323 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
325 # should never happen
326 die "ERROR: can't find file $abs_cpan_file\n"
327 unless -f $abs_cpan_file;
329 # might happen if the FILES entry in Maintainers.pl is wrong
330 unless ( -f $mapped_file ) {
331 print $outfh "WARNING: perl file not found: $mapped_file\n";
335 my $relative_mapped_file = relatively_mapped($mapped_file);
338 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
340 if ( $different && customized( $m, $relative_mapped_file ) ) {
341 print $outfh " Customized for blead: $relative_mapped_file\n";
342 if ( $use_diff && $verbose ) {
343 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
344 print $outfh $different;
349 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
350 print $outfh $different;
353 if ( $cpan_file eq $relative_mapped_file ) {
354 print $outfh " Modified: $relative_mapped_file\n";
358 " Modified: $cpan_file $relative_mapped_file\n";
361 if ( $cpan_file =~ m{\.pm\z} ) {
362 my $pv = MM->parse_version($mapped_file) || 'unknown';
363 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
366 " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
372 elsif ( customized( $m, $relative_mapped_file ) ) {
373 # Maintainers.pl says we customized it, but it looks the
374 # same as CPAN so maybe we lost the customization, which
376 if ( $cpan_file eq $relative_mapped_file ) {
377 print $outfh " Blead customization missing: $cpan_file\n";
381 " Blead customization missing: $cpan_file $relative_mapped_file\n";
385 if ( $cpan_file eq $relative_mapped_file ) {
386 print $outfh " Unchanged: $cpan_file\n";
390 " Unchanged: $cpan_file $relative_mapped_file\n";
394 for ( sort keys %perl_unseen ) {
395 my $relative_mapped_file = relatively_mapped($_);
396 if ( customized( $m, $relative_mapped_file ) ) {
397 print $outfh " Customized for blead: $_\n";
400 print $outfh " Perl only: $_\n" unless $use_diff;
404 foreach my $exclude (@$excluded) {
406 foreach my $cpan_file (@cpan_files) {
407 # may be a simple string to match exactly, or a pattern
408 if ( ref $exclude ) {
409 $seen = 1 if $cpan_file =~ $exclude;
412 $seen = 1 if $cpan_file eq $exclude;
417 print $outfh " Unnecessary exclusion: $exclude\n";
424 sub relatively_mapped {
425 my $relative = shift;
426 $relative =~ s/^(cpan|dist|ext)\/.*?\///;
430 # given FooBar-1.23_45.tar.gz, return FooBar
434 $d =~ s/\.tar\.gz$//;
436 $d =~ s/[\d\-_\.]+$//;
440 # process --crosscheck action:
441 # ie list all distributions whose CPAN versions differ from that listed in
446 $outfh, $cache_dir, $mirror_url, $verbose,
447 $force, $modules, $wanted_upstreams,
450 my $file = '02packages.details.txt';
451 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
452 my $path = catfile( $download_dir, $file );
453 my $gzfile = "$path.gz";
455 # grab 02packages.details.txt
457 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
459 if ( !-f $gzfile or $force ) {
461 my_getstore( $url, $gzfile );
464 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
466 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
468 # suck in the data from it
470 open my $fh, '<', $path
471 or die "ERROR: open: $file: $!\n";
479 my @f = split ' ', $_;
482 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
486 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
487 $modules{ $f[0] } = $distro;
489 ( my $short_distro = $distro ) =~ s{^.*/}{};
491 $distros{ distro_base($short_distro) }{$distro} = 1;
494 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
495 for my $module (@$modules) {
496 my $m = $Maintainers::Modules{$module}
497 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
499 $verbose and warn "Checking $module\n";
501 unless ( $m->{CPAN} ) {
502 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
506 # given an entry like
507 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
508 # first compare the module name against Foo::Bar, and failing that,
511 my $pdist = $m->{DISTRIBUTION};
512 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
514 my $upstream = $m->{UPSTREAM} // 'undef';
515 next if @$wanted_upstreams and !$wanted_upstream{$upstream};
517 my $cdist = $modules{$module};
518 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
520 unless ( defined $cdist ) {
521 my $d = $distros{ distro_base($short_pdist) };
522 unless ( defined $d ) {
523 print $outfh "\n$module: Can't determine current CPAN entry\n";
526 if ( keys %$d > 1 ) {
528 "\n$module: (found more than one CPAN candidate):\n";
529 print $outfh " Perl: $pdist\n";
530 print $outfh " CPAN: $_\n" for sort keys %$d;
533 $cdist = ( keys %$d )[0];
536 if ( $cdist ne $pdist ) {
537 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
542 # get the EXCLUDED and MAP entries for this module, or
543 # make up defaults if they don't exist
546 my ( $m, $module_name, $perl_files ) = @_;
548 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
553 return $excluded, $map, $customized if $map;
555 # all files under ext/foo-bar (plus maybe some under t/lib)???
559 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
560 if ( defined $ext and $ext ne $1 ) {
562 # more than one ext/$ext/
577 if ( defined $ext ) {
578 $map = { '' => $ext },;
581 ( my $base = $module_name ) =~ s{::}{/}g;
588 return $excluded, $map, $customized;
591 # Given an exclude list and a mapping hash, convert a CPAN filename
592 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
593 # Returns an empty list for an excluded file
596 my ( $excluded, $map, $customized, $cpan_file ) = @_;
598 my %customized = map { ( $_ => 1 ) } @$customized;
599 for my $exclude (@$excluded) {
600 next if $customized{$exclude};
602 # may be a simple string to match exactly, or a pattern
603 if ( ref $exclude ) {
604 return if $cpan_file =~ $exclude;
607 return if $cpan_file eq $exclude;
611 my $perl_file = $cpan_file;
613 # try longest prefix first, then alphabetically on tie-break
615 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
617 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
622 # fetch a file from a URL and store it in a file given by a filename
625 my ( $url, $file ) = @_;
626 File::Path::mkpath( File::Basename::dirname($file) );
627 if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
628 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
629 File::Copy::copy( $local_path, $file );
631 my $http = HTTP::Tiny->new;
632 my $response = $http->mirror($url, $file);
633 return $response->{success};
637 # download and unpack a distribution
638 # Returns the full pathname of the extracted directory
639 # (eg '/tmp/XYZ/Foo_bar-1.23')
641 # cache_dir: where to download the .tar.gz file to
642 # mirror_url: CPAN mirror to download from
643 # untar_dir: where to untar or unzup the file
644 # module: name of module
645 # dist: name of the distribution
647 sub get_distribution {
648 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
650 $dist =~ m{.+/([^/]+)$}
652 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
655 my $download_file = catfile( $src_dir, $filename );
657 # download distribution
659 if ( -f $download_file and !-s $download_file ) {
661 # failed download might leave a zero-length file
662 unlink $download_file;
665 unless ( -f $download_file ) {
668 my $url = cpan_url_distribution( $mirror_url, $dist );
669 my_getstore( $url, $download_file )
670 or die "ERROR: Could not fetch '$url'\n";
673 # get the expected name of the extracted distribution dir
675 my $path = catfile( $untar_dir, $filename );
677 $path =~ s/\.tar\.gz$//
678 or $path =~ s/\.tgz$//
679 or $path =~ s/\.zip$//
681 "ERROR: downloaded file does not have a recognised suffix: $path\n";
683 # extract it unless we already have it cached or tarball is newer
684 if ( !-d $path || ( -M $download_file < -M $path ) ) {
685 $path = extract( $download_file, $untar_dir )
687 "ERROR: failed to extract distribution '$download_file to temp. dir: "
691 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
696 # produce the diff of a single file
699 my $cpan_file = shift;
700 my $perl_file = shift;
702 my $diff_opts = shift;
704 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
706 push @cmd, $perl_file, $cpan_file;
709 push @cmd, $cpan_file, $perl_file;
716 my ( $module_data, $file ) = @_;
717 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };
721 my ($archive,$to) = @_;
723 chdir $to or die "$!\n";
726 local $Archive::Tar::CHOWN = 0;
728 unless ( $next = Archive::Tar->iter( $archive, 1 ) ) {
729 $! = $Archive::Tar::error;
732 while ( my $file = $next->() ) {
733 push @files, $file->full_path;
734 unless ( $file->extract ) {
735 $! = $Archive::Tar::error;
740 my $path = __get_extract_dir( \@files );
741 chdir $cwd or die "$!\n";
745 sub __get_extract_dir {
746 my $files = shift || [];
748 return unless scalar @$files;
751 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
752 my($dir,$pos) = @$aref;
754 ### add a catdir(), so that any trailing slashes get
755 ### take care of (removed)
756 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
757 ### which was the problem in bug #23999
758 my $res = -d $files->[$pos]
759 ? File::Spec->catdir( $files->[$pos], '' )
760 : File::Spec->catdir( File::Basename::dirname( $files->[$pos] ) );
765 ### if the first and last dir don't match, make sure the
766 ### dirname is not set wrongly
769 ### dirs are the same, so we know for sure what the extract dir is
770 if( $dir1 eq $dir2 ) {
773 ### dirs are different.. do they share the base dir?
774 ### if so, use that, if not, fall back to '.'
776 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
777 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
779 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
782 return File::Spec->rel2abs( $dir );