3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
13 use File::Basename ();
18 use File::Spec::Functions;
19 use IO::Uncompress::Gunzip ();
21 use ExtUtils::Manifest;
22 use ExtUtils::MakeMaker ();
25 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
30 use Cwd qw[cwd chdir];
33 local $Archive::Tar::WARN=0;
35 # where, under the cache dir, to download tarballs to
36 use constant SRC_DIR => 'tarballs';
38 # where, under the cache dir, to untar stuff to
39 use constant UNTAR_DIR => 'untarred';
41 use constant DIFF_CMD => 'diff';
44 print STDERR "\n@_\n\n" if @_;
46 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
48 -a/--all Scan all dual-life modules.
50 -c/--cachedir Where to save downloaded CPAN tarball files
51 (defaults to /tmp/something/ with deletion after each run).
53 -d/--diff Display file differences using diff(1), rather than just
54 listing which files have changed.
56 --diffopts Options to pass to the diff command. Defaults to '-u'.
58 -f|force Force download from CPAN of new 02packages.details.txt file
59 (with --crosscheck only).
61 -m|mirror Preferred CPAN mirror URI (http:// or file:///)
62 (Local mirror must be a complete mirror, not minicpan)
64 -o/--output File name to write output to (defaults to STDOUT).
66 -r/--reverse Reverses the diff (perl to CPAN).
68 -u/--upstream only print modules with the given upstream (defaults to all)
70 -v/--verbose List the fate of *all* files in the tarball, not just those
71 that differ or are missing.
73 -x|crosscheck List the distributions whose current CPAN version differs from
74 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
76 By default (i.e. without the --crosscheck option), for each listed module
77 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
78 from CPAN associated with that module, and compare the files in it with
79 those in the perl source tree.
81 Must be run from the root of the perl source tree.
82 Module names must match the keys of %Modules in Maintainers.pl.
84 The diff(1) command is assumed to be in your PATH and is used to diff files
85 regardless of whether the --diff option has been chosen to display any file
97 my $mirror_url = "http://www.cpan.org/";
105 'a|all' => \$scan_all,
106 'c|cachedir=s' => \$cache_dir,
107 'd|diff' => \$use_diff,
108 'diffopts:s' => \$diff_opts,
109 'f|force' => \$force,
111 'm|mirror=s' => \$mirror_url,
112 'o|output=s' => \$output_file,
113 'r|reverse' => \$reverse,
114 'u|upstream=s@' => \@wanted_upstreams,
115 'v|verbose' => \$verbose,
116 'x|crosscheck' => \$do_crosscheck,
121 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
123 if ($do_crosscheck) {
124 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
125 if ( $reverse || $use_diff || $diff_opts || $verbose );
128 $diff_opts = '-u -b' unless defined $diff_opts;
129 usage("can't use -f without --crosscheck") if $force;
134 ? grep $Maintainers::Modules{$_}{CPAN},
135 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
137 usage("No modules specified") unless @modules;
140 if ( defined $output_file ) {
141 open $outfh, '>', $output_file
142 or die "ERROR: could not open file '$output_file' for writing: $!\n";
145 open $outfh, ">&STDOUT"
146 or die "ERROR: can't dup STDOUT: $!\n";
149 if ( defined $cache_dir ) {
150 die "ERROR: not a directory: '$cache_dir'\n"
151 if !-d $cache_dir && -e $cache_dir;
152 File::Path::mkpath($cache_dir);
155 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
158 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
159 my $test_file = "modules/03modlist.data.gz";
161 cpan_url( $mirror_url, $test_file ),
162 catfile( $cache_dir, $test_file )
163 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
165 if ($do_crosscheck) {
167 $outfh, $cache_dir, $mirror_url,
168 $force, \@modules, \@wanted_upstreams
173 \@modules, $outfh, $output_file,
174 $cache_dir, $mirror_url, $verbose,
175 $use_diff, $reverse, $diff_opts,
181 # construct a CPAN url
184 my ( $mirror_url, @path ) = @_;
185 return $mirror_url unless @path;
186 my $cpan_path = join( "/", map { split "/", $_ } @path );
187 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
188 return $mirror_url . $cpan_path;
191 # construct a CPAN URL for a author/distribution string like:
192 # BINGOS/Archive-Extract-0.52.tar.gz
194 sub cpan_url_distribution {
195 my ( $mirror_url, $distribution ) = @_;
196 $distribution =~ /^([A-Z])([A-Z])/
197 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
198 my $path = "authors/id/$1/$1$2/$distribution";
199 return cpan_url( $mirror_url, $path );
202 # compare a list of modules against their CPAN equivalents
206 $modules, $outfh, $output_file, $cache_dir,
207 $mirror_url, $verbose, $use_diff, $reverse,
208 $diff_opts, $wanted_upstreams
211 # first, make sure we have a directory where they can all be untarred,
212 # and if its a permanent directory, clear any previous content
213 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
214 my $src_dir = catdir( $cache_dir, SRC_DIR );
215 for my $d ( $src_dir, $untar_dir ) {
217 mkdir $d or die "mkdir $d: $!\n";
220 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
221 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
224 for my $module (@$modules) {
225 warn "Processing $module ...\n" if defined $output_file;
227 my $m = $Maintainers::Modules{$module}
228 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
230 unless ( $m->{CPAN} ) {
231 print $outfh "WARNING: $module is not dual-life; skipping\n";
235 my $dist = $m->{DISTRIBUTION};
236 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
238 if ( $seen_dist{$dist}++ ) {
239 warn "WARNING: duplicate entry for $dist in $module\n";
242 my $upstream = $m->{UPSTREAM} // 'undef';
243 next if @$wanted_upstreams and !$wanted_upstream{$upstream};
245 print $outfh "\n$module - "
246 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
247 print $outfh " upstream is: "
248 . ( $m->{UPSTREAM} // 'UNKNOWN!' ) . "\n";
253 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
257 print $outfh " ", $@;
258 print $outfh " (skipping)\n";
262 my @perl_files = Maintainers::get_module_files($module);
264 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
265 die "ERROR: no such file: $manifest\n" unless -f $manifest;
267 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
268 my @cpan_files = sort keys %$cpan_files;
270 ( my $main_pm = $module ) =~ s{::}{/}g;
273 my ( $excluded, $map, $customized ) =
274 get_map( $m, $module, \@perl_files );
277 @perl_unseen{@perl_files} = ();
278 my %perl_files = %perl_unseen;
280 foreach my $cpan_file (@cpan_files) {
282 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
283 unless ( defined $mapped_file ) {
284 print $outfh " Excluded: $cpan_file\n" if $verbose;
288 if ( exists $perl_files{$mapped_file} ) {
289 delete $perl_unseen{$mapped_file};
293 # some CPAN files foo are stored in core as foo.packed,
294 # which are then unpacked by 'make test_prep'
295 my $packed_file = "$mapped_file.packed";
296 if ( exists $perl_files{$packed_file} ) {
297 if ( !-f $mapped_file and -f $packed_file ) {
299 WARNING: $mapped_file not found, but .packed variant exists.
300 Perhaps you need to run 'make test_prep'?
304 delete $perl_unseen{$packed_file};
307 if ( $ignorable{$cpan_file} ) {
308 print $outfh " Ignored: $cpan_file\n" if $verbose;
313 print $outfh " CPAN only: $cpan_file",
314 ( $cpan_file eq $mapped_file )
316 : " (missing $mapped_file)\n";
322 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
324 # should never happen
325 die "ERROR: can't find file $abs_cpan_file\n"
326 unless -f $abs_cpan_file;
328 # might happen if the FILES entry in Maintainers.pl is wrong
329 unless ( -f $mapped_file ) {
330 print $outfh "WARNING: perl file not found: $mapped_file\n";
334 my $relative_mapped_file = relatively_mapped($mapped_file);
337 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
339 if ( $different && customized( $m, $relative_mapped_file ) ) {
341 print $outfh " Customized for blead: $relative_mapped_file\n";
346 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
347 print $outfh $different;
350 if ( $cpan_file eq $relative_mapped_file ) {
351 print $outfh " Modified: $relative_mapped_file\n";
355 " Modified: $cpan_file $relative_mapped_file\n";
358 if ( $cpan_file =~ m{\.pm\z} ) {
359 my $pv = MM->parse_version($mapped_file) || 'unknown';
360 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
363 " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
369 elsif ( customized( $m, $relative_mapped_file ) ) {
370 # Maintainers.pl says we customized it, but it looks the
371 # same as CPAN so maybe we lost the customization, which
373 if ( $cpan_file eq $relative_mapped_file ) {
374 print $outfh " Blead customization missing: $cpan_file\n";
378 " Blead customization missing: $cpan_file $relative_mapped_file\n";
382 if ( $cpan_file eq $relative_mapped_file ) {
383 print $outfh " Unchanged: $cpan_file\n";
387 " Unchanged: $cpan_file $relative_mapped_file\n";
391 for ( sort keys %perl_unseen ) {
392 my $relative_mapped_file = relatively_mapped($_);
393 if ( customized( $m, $relative_mapped_file ) ) {
394 print $outfh " Customized for blead: $_\n";
397 print $outfh " Perl only: $_\n" unless $use_diff;
403 sub relatively_mapped {
404 my $relative = shift;
405 $relative =~ s/^(cpan|dist|ext)\/.*?\///;
409 # given FooBar-1.23_45.tar.gz, return FooBar
413 $d =~ s/\.tar\.gz$//;
415 $d =~ s/[\d\-_\.]+$//;
419 # process --crosscheck action:
420 # ie list all distributions whose CPAN versions differ from that listed in
425 $outfh, $cache_dir, $mirror_url,
426 $force, $modules, $wanted_upstreams,
429 my $file = '02packages.details.txt';
430 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
431 my $path = catfile( $download_dir, $file );
432 my $gzfile = "$path.gz";
434 # grab 02packages.details.txt
436 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
438 if ( !-f $gzfile or $force ) {
440 my_getstore( $url, $gzfile );
443 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
445 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
447 # suck in the data from it
449 open my $fh, '<', $path
450 or die "ERROR: open: $file: $!\n";
458 my @f = split ' ', $_;
461 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
465 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
466 $modules{ $f[0] } = $distro;
468 ( my $short_distro = $distro ) =~ s{^.*/}{};
470 $distros{ distro_base($short_distro) }{$distro} = 1;
473 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
474 for my $module (@$modules) {
475 my $m = $Maintainers::Modules{$module}
476 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
478 unless ( $m->{CPAN} ) {
479 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
483 # given an entry like
484 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
485 # first compare the module name against Foo::Bar, and failing that,
488 my $pdist = $m->{DISTRIBUTION};
489 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
491 my $upstream = $m->{UPSTREAM} // 'undef';
492 next if @$wanted_upstreams and !$wanted_upstream{$upstream};
494 my $cdist = $modules{$module};
495 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
497 unless ( defined $cdist ) {
498 my $d = $distros{ distro_base($short_pdist) };
499 unless ( defined $d ) {
500 print $outfh "\n$module: Can't determine current CPAN entry\n";
503 if ( keys %$d > 1 ) {
505 "\n$module: (found more than one CPAN candidate):\n";
506 print $outfh " perl: $pdist\n";
507 print $outfh " CPAN: $_\n" for sort keys %$d;
510 $cdist = ( keys %$d )[0];
513 if ( $cdist ne $pdist ) {
514 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
519 # get the EXCLUDED and MAP entries for this module, or
520 # make up defaults if they don't exist
523 my ( $m, $module_name, $perl_files ) = @_;
525 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
530 return $excluded, $map, $customized if $map;
532 # all files under ext/foo-bar (plus maybe some under t/lib)???
536 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
537 if ( defined $ext and $ext ne $1 ) {
539 # more than one ext/$ext/
554 if ( defined $ext ) {
555 $map = { '' => $ext },;
558 ( my $base = $module_name ) =~ s{::}{/}g;
565 return $excluded, $map, $customized;
568 # Given an exclude list and a mapping hash, convert a CPAN filename
569 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
570 # Returns an empty list for an excluded file
573 my ( $excluded, $map, $customized, $cpan_file ) = @_;
575 my %customized = map { ( $_ => 1 ) } @$customized;
576 for my $exclude (@$excluded) {
577 next if $customized{$exclude};
579 # may be a simple string to match exactly, or a pattern
580 if ( ref $exclude ) {
581 return if $cpan_file =~ $exclude;
584 return if $cpan_file eq $exclude;
588 my $perl_file = $cpan_file;
590 # try longest prefix first, then alphabetically on tie-break
592 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
594 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
599 # fetch a file from a URL and store it in a file given by a filename
602 my ( $url, $file ) = @_;
603 File::Path::mkpath( File::Basename::dirname($file) );
604 if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
605 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
606 File::Copy::copy( $local_path, $file );
608 my $http = HTTP::Tiny->new;
609 my $response = $http->mirror($url, $file);
610 return $response->{success};
614 # download and unpack a distribution
615 # Returns the full pathname of the extracted directory
616 # (eg '/tmp/XYZ/Foo_bar-1.23')
618 # cache_dir: where to download the .tar.gz file to
619 # mirror_url: CPAN mirror to download from
620 # untar_dir: where to untar or unzup the file
621 # module: name of module
622 # dist: name of the distribution
624 sub get_distribution {
625 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
627 $dist =~ m{.+/([^/]+)$}
629 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
632 my $download_file = catfile( $src_dir, $filename );
634 # download distribution
636 if ( -f $download_file and !-s $download_file ) {
638 # failed download might leave a zero-length file
639 unlink $download_file;
642 unless ( -f $download_file ) {
645 my $url = cpan_url_distribution( $mirror_url, $dist );
646 my_getstore( $url, $download_file )
647 or die "ERROR: Could not fetch '$url'\n";
650 # get the expected name of the extracted distribution dir
652 my $path = catfile( $untar_dir, $filename );
654 $path =~ s/\.tar\.gz$//
655 or $path =~ s/\.tgz$//
656 or $path =~ s/\.zip$//
658 "ERROR: downloaded file does not have a recognised suffix: $path\n";
660 # extract it unless we already have it cached or tarball is newer
661 if ( !-d $path || ( -M $download_file < -M $path ) ) {
662 $path = extract( $download_file, $untar_dir )
664 "ERROR: failed to extract distribution '$download_file to temp. dir: "
668 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
673 # produce the diff of a single file
676 my $cpan_file = shift;
677 my $perl_file = shift;
679 my $diff_opts = shift;
681 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
683 push @cmd, $perl_file, $cpan_file;
686 push @cmd, $cpan_file, $perl_file;
693 my ( $module_data, $file ) = @_;
694 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };
698 my ($archive,$to) = @_;
700 chdir $to or die "$!\n";
703 local $Archive::Tar::CHOWN = 0;
705 unless ( $next = Archive::Tar->iter( $archive, 1 ) ) {
706 $! = $Archive::Tar::error;
709 while ( my $file = $next->() ) {
710 push @files, $file->full_path;
711 unless ( $file->extract ) {
712 $! = $Archive::Tar::error;
717 my $path = __get_extract_dir( \@files );
718 chdir $cwd or die "$!\n";
722 sub __get_extract_dir {
723 my $files = shift || [];
725 return unless scalar @$files;
728 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
729 my($dir,$pos) = @$aref;
731 ### add a catdir(), so that any trailing slashes get
732 ### take care of (removed)
733 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
734 ### which was the problem in bug #23999
735 my $res = -d $files->[$pos]
736 ? File::Spec->catdir( $files->[$pos], '' )
737 : File::Spec->catdir( File::Basename::dirname( $files->[$pos] ) );
742 ### if the first and last dir don't match, make sure the
743 ### dirname is not set wrongly
746 ### dirs are the same, so we know for sure what the extract dir is
747 if( $dir1 eq $dir2 ) {
750 ### dirs are different.. do they share the base dir?
751 ### if so, use that, if not, fall back to '.'
753 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
754 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
756 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
759 return File::Spec->rel2abs( $dir );