3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
13 use File::Basename ();
17 use File::Spec::Functions;
19 use IO::Uncompress::Gunzip ();
21 use ExtUtils::Manifest;
22 use ExtUtils::MakeMaker ();
25 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
29 # if running from blead, we may be doing -Ilib, which means when we
30 # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
31 # So preload the things we need, and tell it to check %INC first:
36 $Module::Load::Conditional::CHECK_INC_HASH = 1;
38 # stop Archive::Extract whinging about lack of Archive::Zip
39 $Archive::Extract::WARN = 0;
41 # where, under the cache dir, to download tarballs to
42 use constant SRC_DIR => 'tarballs';
44 # where, under the cache dir, to untar stuff to
45 use constant UNTAR_DIR => 'untarred';
47 use constant DIFF_CMD => 'diff';
48 use constant WGET_CMD => 'wget';
51 print STDERR "\n@_\n\n" if @_;
53 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
55 -a/--all Scan all dual-life modules.
57 -c/--cachedir Where to save downloaded CPAN tarball files
58 (defaults to /tmp/something/ with deletion after each run).
60 -d/--diff Display file differences using diff(1), rather than just
61 listing which files have changed.
62 The diff(1) command is assumed to be in your PATH.
64 --diffopts Options to pass to the diff command. Defaults to '-u'.
66 -f|force Force download from CPAN of new 02packages.details.txt file
67 (with --crosscheck only).
69 -m|mirror Preferred CPAN mirror URI (http:// or file:///)
70 (Local mirror must be a complete mirror, not minicpan)
72 -o/--output File name to write output to (defaults to STDOUT).
74 -r/--reverse Reverses the diff (perl to CPAN).
76 -u/--upstream only print modules with the given upstream (defaults to all)
78 -v/--verbose List the fate of *all* files in the tarball, not just those
79 that differ or are missing.
81 -x|crosscheck List the distributions whose current CPAN version differs from
82 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
84 By default (i.e. without the --crosscheck option), for each listed module
85 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
86 from CPAN associated with that module, and compare the files in it with
87 those in the perl source tree.
89 Must be run from the root of the perl source tree.
90 Module names must match the keys of %Modules in Maintainers.pl.
101 my $mirror_url = "http://www.cpan.org/";
109 'a|all' => \$scan_all,
110 'c|cachedir=s' => \$cache_dir,
111 'd|diff' => \$use_diff,
112 'diffopts:s' => \$diff_opts,
113 'f|force' => \$force,
115 'm|mirror=s' => \$mirror_url,
116 'o|output=s' => \$output_file,
117 'r|reverse' => \$reverse,
118 'u|upstream=s@' => \@wanted_upstreams,
119 'v|verbose' => \$verbose,
120 'x|crosscheck' => \$do_crosscheck,
123 @wanted_upstreams = map { $_ eq 'undef' ? undef : $_ } @wanted_upstreams;
127 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
129 if ($do_crosscheck) {
130 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
131 if ( $reverse || $use_diff || $diff_opts || $verbose );
134 $diff_opts = '-u -b' unless defined $diff_opts;
135 usage("can't use -f without --crosscheck") if $force;
140 ? grep $Maintainers::Modules{$_}{CPAN},
141 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
143 usage("No modules specified") unless @modules;
146 if ( defined $output_file ) {
147 open $outfh, '>', $output_file
148 or die "ERROR: could not open file '$output_file' for writing: $!\n";
151 open $outfh, ">&STDOUT"
152 or die "ERROR: can't dup STDOUT: $!\n";
155 if ( defined $cache_dir ) {
156 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
159 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
162 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
163 my $test_file = "modules/03modlist.data.gz";
165 cpan_url( $mirror_url, $test_file ),
166 catfile( $cache_dir, $test_file )
167 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
169 if ($do_crosscheck) {
171 $outfh, $cache_dir, $mirror_url,
172 $force, \@modules, \@wanted_upstreams
177 \@modules, $outfh, $output_file,
178 $cache_dir, $mirror_url, $verbose,
179 $use_diff, $reverse, $diff_opts,
185 # construct a CPAN url
188 my ( $mirror_url, @path ) = @_;
189 return $mirror_url unless @path;
190 my $cpan_path = join( "/", map { split "/", $_ } @path );
191 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
192 return $mirror_url . $cpan_path;
195 # construct a CPAN URL for a author/distribution string like:
196 # BINGOS/Archive-Extract-0.52.tar.gz
198 sub cpan_url_distribution {
199 my ( $mirror_url, $distribution ) = @_;
200 $distribution =~ /^([A-Z])([A-Z])/
201 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
202 my $path = "authors/id/$1/$1$2/$distribution";
203 return cpan_url( $mirror_url, $path );
206 # compare a list of modules against their CPAN equivalents
210 $modules, $outfh, $output_file, $cache_dir,
211 $mirror_url, $verbose, $use_diff, $reverse,
212 $diff_opts, $wanted_upstreams
215 # first, make sure we have a directory where they can all be untarred,
216 # and if its a permanent directory, clear any previous content
217 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
218 my $src_dir = catdir( $cache_dir, SRC_DIR );
219 for my $d ( $src_dir, $untar_dir ) {
221 mkdir $d or die "mkdir $d: $!\n";
224 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
227 for my $module (@$modules) {
228 warn "Processing $module ...\n" if defined $output_file;
230 my $m = $Maintainers::Modules{$module}
231 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
233 unless ( $m->{CPAN} ) {
234 print $outfh "WARNING: $module is not dual-life; skipping\n";
238 my $dist = $m->{DISTRIBUTION};
239 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
241 if ( $seen_dist{$dist}++ ) {
242 warn "WARNING: duplicate entry for $dist in $module\n";
245 my $upstream = $m->{UPSTREAM};
246 next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams );
248 print $outfh "\n$module - "
249 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
250 print $outfh " upstream is: "
251 . ( $m->{UPSTREAM} // 'UNKNOWN!' ) . "\n";
256 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
260 print $outfh " ", $@;
261 print $outfh " (skipping)\n";
265 my @perl_files = Maintainers::get_module_files($module);
267 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
268 die "ERROR: no such file: $manifest\n" unless -f $manifest;
270 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
271 my @cpan_files = sort keys %$cpan_files;
273 ( my $main_pm = $module ) =~ s{::}{/}g;
276 my ( $excluded, $map, $customized ) =
277 get_map( $m, $module, \@perl_files );
280 @perl_unseen{@perl_files} = ();
281 my %perl_files = %perl_unseen;
283 foreach my $cpan_file (@cpan_files) {
285 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
286 unless ( defined $mapped_file ) {
287 print $outfh " Excluded: $cpan_file\n" if $verbose;
291 if ( exists $perl_files{$mapped_file} ) {
292 delete $perl_unseen{$mapped_file};
296 # some CPAN files foo are stored in core as foo.packed,
297 # which are then unpacked by 'make test_prep'
298 my $packed_file = "$mapped_file.packed";
299 if ( exists $perl_files{$packed_file} ) {
300 if ( !-f $mapped_file and -f $packed_file ) {
302 WARNING: $mapped_file not found, but .packed variant exists.
303 Perhaps you need to run 'make test_prep'?
307 delete $perl_unseen{$packed_file};
310 if ( $ignorable{$cpan_file} ) {
311 print $outfh " Ignored: $cpan_file\n" if $verbose;
316 print $outfh " CPAN only: $cpan_file",
317 ( $cpan_file eq $mapped_file )
319 : " (missing $mapped_file)\n";
325 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
327 # should never happen
328 die "ERROR: can't find file $abs_cpan_file\n"
329 unless -f $abs_cpan_file;
331 # might happen if the FILES entry in Maintainers.pl is wrong
332 unless ( -f $mapped_file ) {
333 print $outfh "WARNING: perl file not found: $mapped_file\n";
337 my $relative_mapped_file = relatively_mapped($mapped_file);
340 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
342 if ( $different && customized( $m, $relative_mapped_file ) ) {
344 print $outfh " Customized for blead: $relative_mapped_file\n";
349 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
350 print $outfh $different;
353 if ( $cpan_file eq $relative_mapped_file ) {
354 print $outfh " Modified: $relative_mapped_file\n";
358 " Modified: $cpan_file $relative_mapped_file\n";
361 if ( $cpan_file =~ m{\.pm\z} ) {
362 my $pv = MM->parse_version($mapped_file) || 'unknown';
363 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
366 " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
372 elsif ( customized( $m, $relative_mapped_file ) ) {
373 # Maintainers.pl says we customized it, but it looks the
374 # same as CPAN so maybe we lost the customization, which
376 if ( $cpan_file eq $relative_mapped_file ) {
377 print $outfh " Blead customization missing: $cpan_file\n";
381 " Blead customization missing: $cpan_file $relative_mapped_file\n";
385 if ( $cpan_file eq $relative_mapped_file ) {
386 print $outfh " Unchanged: $cpan_file\n";
390 " Unchanged: $cpan_file $relative_mapped_file\n";
394 for ( sort keys %perl_unseen ) {
395 my $relative_mapped_file = relatively_mapped($_);
396 if ( customized( $m, $relative_mapped_file ) ) {
397 print $outfh " Customized for blead: $_\n";
400 print $outfh " Perl only: $_\n" unless $use_diff;
406 sub relatively_mapped {
407 my $relative = shift;
408 $relative =~ s/^(cpan|dist|ext)\/.*?\///;
412 # given FooBar-1.23_45.tar.gz, return FooBar
416 $d =~ s/\.tar\.gz$//;
418 $d =~ s/[\d\-_\.]+$//;
422 # process --crosscheck action:
423 # ie list all distributions whose CPAN versions differ from that listed in
428 $outfh, $cache_dir, $mirror_url,
429 $force, $modules, $wanted_upstreams,
432 my $file = '02packages.details.txt';
433 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
434 my $path = catfile( $download_dir, $file );
435 my $gzfile = "$path.gz";
437 # grab 02packages.details.txt
439 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
441 if ( !-f $gzfile or $force ) {
443 my_getstore( $url, $gzfile );
446 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
448 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
450 # suck in the data from it
452 open my $fh, '<', $path
453 or die "ERROR: open: $file: $!\n";
461 my @f = split ' ', $_;
464 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
468 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
469 $modules{ $f[0] } = $distro;
471 ( my $short_distro = $distro ) =~ s{^.*/}{};
473 $distros{ distro_base($short_distro) }{$distro} = 1;
476 for my $module (@$modules) {
477 my $m = $Maintainers::Modules{$module}
478 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
480 unless ( $m->{CPAN} ) {
481 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
485 # given an entry like
486 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
487 # first compare the module name against Foo::Bar, and failing that,
490 my $pdist = $m->{DISTRIBUTION};
491 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
493 my $upstream = $m->{UPSTREAM};
494 next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams );
496 my $cdist = $modules{$module};
497 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
499 unless ( defined $cdist ) {
500 my $d = $distros{ distro_base($short_pdist) };
501 unless ( defined $d ) {
502 print $outfh "\n$module: Can't determine current CPAN entry\n";
505 if ( keys %$d > 1 ) {
507 "\n$module: (found more than one CPAN candidate):\n";
508 print $outfh " perl: $pdist\n";
509 print $outfh " CPAN: $_\n" for sort keys %$d;
512 $cdist = ( keys %$d )[0];
515 if ( $cdist ne $pdist ) {
516 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
521 # get the EXCLUDED and MAP entries for this module, or
522 # make up defauts if they don't exist
525 my ( $m, $module_name, $perl_files ) = @_;
527 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
532 return $excluded, $map, $customized if $map;
534 # all files under ext/foo-bar (plus maybe some under t/lib)???
538 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
539 if ( defined $ext and $ext ne $1 ) {
541 # more than one ext/$ext/
556 if ( defined $ext ) {
557 $map = { '' => $ext },;
560 ( my $base = $module_name ) =~ s{::}{/}g;
567 return $excluded, $map, $customized;
570 # Given an exclude list and a mapping hash, convert a CPAN filename
571 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
572 # Returns an empty list for an excluded file
575 my ( $excluded, $map, $customized, $cpan_file ) = @_;
577 for my $exclude (@$excluded) {
578 next if $exclude ~~ $customized;
580 # may be a simple string to match exactly, or a pattern
581 if ( ref $exclude ) {
582 return if $cpan_file =~ $exclude;
585 return if $cpan_file eq $exclude;
589 my $perl_file = $cpan_file;
591 # try longest prefix first, then alphabetically on tie-break
593 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
595 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
600 # fetch a file from a URL and store it in a file given by a filename
603 my ( $url, $file ) = @_;
604 File::Path::mkpath( File::Basename::dirname($file) );
605 if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
606 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
607 File::Copy::copy( $local_path, $file );
609 my $http = HTTP::Tiny->new;
610 my $response = $http->mirror($url, $file);
611 return $response->{success};
615 # download and unpack a distribution
616 # Returns the full pathname of the extracted directory
617 # (eg '/tmp/XYZ/Foo_bar-1.23')
619 # cache_dir: where to download the .tar.gz file to
620 # mirror_url: CPAN mirror to download from
621 # untar_dir: where to untar or unzup the file
622 # module: name of module
623 # dist: name of the distribution
625 sub get_distribution {
626 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
628 $dist =~ m{.+/([^/]+)$}
630 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
633 my $download_file = catfile( $src_dir, $filename );
635 # download distribution
637 if ( -f $download_file and !-s $download_file ) {
639 # wget can leave a zero-length file on failed download
640 unlink $download_file;
643 unless ( -f $download_file ) {
646 my $url = cpan_url_distribution( $mirror_url, $dist );
647 my_getstore( $url, $download_file )
648 or die "ERROR: Could not fetch '$url'\n";
651 # get the expected name of the extracted distribution dir
653 my $path = catfile( $untar_dir, $filename );
655 $path =~ s/\.tar\.gz$//
656 or $path =~ s/\.zip$//
658 "ERROR: downloaded file does not have a recognised suffix: $path\n";
660 # extract it unless we already have it cached or tarball is newer
661 if ( !-d $path || ( -M $download_file < -M $path ) ) {
662 my $ae = Archive::Extract->new( archive => $download_file );
663 $ae->extract( to => $untar_dir )
665 "ERROR: failed to extract distribution '$download_file to temp. dir: "
666 . $ae->error() . "\n";
668 $path = $ae->extract_path;
671 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
676 # produce the diff of a single file
679 my $cpan_file = shift;
680 my $perl_file = shift;
682 my $diff_opts = shift;
684 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
686 push @cmd, $perl_file, $cpan_file;
689 push @cmd, $cpan_file, $perl_file;
696 my ( $module_data, $file ) = @_;
697 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };