3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
13 use File::Basename ();
17 use File::Spec::Functions;
19 use IO::Uncompress::Gunzip ();
21 use ExtUtils::Manifest;
22 use ExtUtils::MakeMaker ();
25 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
29 # if running from blead, we may be doing -Ilib, which means when we
30 # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
31 # So preload the things we need, and tell it to check %INC first:
36 $Module::Load::Conditional::CHECK_INC_HASH = 1;
38 # stop Archive::Extract whinging about lack of Archive::Zip
39 $Archive::Extract::WARN = 0;
41 # where, under the cache dir, to download tarballs to
42 use constant SRC_DIR => 'tarballs';
44 # where, under the cache dir, to untar stuff to
45 use constant UNTAR_DIR => 'untarred';
47 use constant DIFF_CMD => 'diff';
50 print STDERR "\n@_\n\n" if @_;
52 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
54 -a/--all Scan all dual-life modules.
56 -c/--cachedir Where to save downloaded CPAN tarball files
57 (defaults to /tmp/something/ with deletion after each run).
59 -d/--diff Display file differences using diff(1), rather than just
60 listing which files have changed.
62 --diffopts Options to pass to the diff command. Defaults to '-u'.
64 -f|force Force download from CPAN of new 02packages.details.txt file
65 (with --crosscheck only).
67 -m|mirror Preferred CPAN mirror URI (http:// or file:///)
68 (Local mirror must be a complete mirror, not minicpan)
70 -o/--output File name to write output to (defaults to STDOUT).
72 -r/--reverse Reverses the diff (perl to CPAN).
74 -u/--upstream only print modules with the given upstream (defaults to all)
76 -v/--verbose List the fate of *all* files in the tarball, not just those
77 that differ or are missing.
79 -x|crosscheck List the distributions whose current CPAN version differs from
80 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
82 By default (i.e. without the --crosscheck option), for each listed module
83 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
84 from CPAN associated with that module, and compare the files in it with
85 those in the perl source tree.
87 Must be run from the root of the perl source tree.
88 Module names must match the keys of %Modules in Maintainers.pl.
90 The diff(1) command is assumed to be in your PATH and is used to diff files
91 regardless of whether the --diff option has been chosen to display any file
101 my @wanted_upstreams;
103 my $mirror_url = "http://www.cpan.org/";
111 'a|all' => \$scan_all,
112 'c|cachedir=s' => \$cache_dir,
113 'd|diff' => \$use_diff,
114 'diffopts:s' => \$diff_opts,
115 'f|force' => \$force,
117 'm|mirror=s' => \$mirror_url,
118 'o|output=s' => \$output_file,
119 'r|reverse' => \$reverse,
120 'u|upstream=s@' => \@wanted_upstreams,
121 'v|verbose' => \$verbose,
122 'x|crosscheck' => \$do_crosscheck,
125 @wanted_upstreams = map { $_ eq 'undef' ? undef : $_ } @wanted_upstreams;
129 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
131 if ($do_crosscheck) {
132 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
133 if ( $reverse || $use_diff || $diff_opts || $verbose );
136 $diff_opts = '-u -b' unless defined $diff_opts;
137 usage("can't use -f without --crosscheck") if $force;
142 ? grep $Maintainers::Modules{$_}{CPAN},
143 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
145 usage("No modules specified") unless @modules;
148 if ( defined $output_file ) {
149 open $outfh, '>', $output_file
150 or die "ERROR: could not open file '$output_file' for writing: $!\n";
153 open $outfh, ">&STDOUT"
154 or die "ERROR: can't dup STDOUT: $!\n";
157 if ( defined $cache_dir ) {
158 die "ERROR: not a directory: '$cache_dir'\n"
159 if !-d $cache_dir && -e $cache_dir;
160 File::Path::mkpath($cache_dir);
163 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
166 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
167 my $test_file = "modules/03modlist.data.gz";
169 cpan_url( $mirror_url, $test_file ),
170 catfile( $cache_dir, $test_file )
171 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
173 if ($do_crosscheck) {
175 $outfh, $cache_dir, $mirror_url,
176 $force, \@modules, \@wanted_upstreams
181 \@modules, $outfh, $output_file,
182 $cache_dir, $mirror_url, $verbose,
183 $use_diff, $reverse, $diff_opts,
189 # construct a CPAN url
192 my ( $mirror_url, @path ) = @_;
193 return $mirror_url unless @path;
194 my $cpan_path = join( "/", map { split "/", $_ } @path );
195 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
196 return $mirror_url . $cpan_path;
199 # construct a CPAN URL for a author/distribution string like:
200 # BINGOS/Archive-Extract-0.52.tar.gz
202 sub cpan_url_distribution {
203 my ( $mirror_url, $distribution ) = @_;
204 $distribution =~ /^([A-Z])([A-Z])/
205 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
206 my $path = "authors/id/$1/$1$2/$distribution";
207 return cpan_url( $mirror_url, $path );
210 # compare a list of modules against their CPAN equivalents
214 $modules, $outfh, $output_file, $cache_dir,
215 $mirror_url, $verbose, $use_diff, $reverse,
216 $diff_opts, $wanted_upstreams
219 # first, make sure we have a directory where they can all be untarred,
220 # and if its a permanent directory, clear any previous content
221 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
222 my $src_dir = catdir( $cache_dir, SRC_DIR );
223 for my $d ( $src_dir, $untar_dir ) {
225 mkdir $d or die "mkdir $d: $!\n";
228 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
231 for my $module (@$modules) {
232 warn "Processing $module ...\n" if defined $output_file;
234 my $m = $Maintainers::Modules{$module}
235 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
237 unless ( $m->{CPAN} ) {
238 print $outfh "WARNING: $module is not dual-life; skipping\n";
242 my $dist = $m->{DISTRIBUTION};
243 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
245 if ( $seen_dist{$dist}++ ) {
246 warn "WARNING: duplicate entry for $dist in $module\n";
249 my $upstream = $m->{UPSTREAM};
250 next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams );
252 print $outfh "\n$module - "
253 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
254 print $outfh " upstream is: "
255 . ( $m->{UPSTREAM} // 'UNKNOWN!' ) . "\n";
260 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
264 print $outfh " ", $@;
265 print $outfh " (skipping)\n";
269 my @perl_files = Maintainers::get_module_files($module);
271 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
272 die "ERROR: no such file: $manifest\n" unless -f $manifest;
274 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
275 my @cpan_files = sort keys %$cpan_files;
277 ( my $main_pm = $module ) =~ s{::}{/}g;
280 my ( $excluded, $map, $customized ) =
281 get_map( $m, $module, \@perl_files );
284 @perl_unseen{@perl_files} = ();
285 my %perl_files = %perl_unseen;
287 foreach my $cpan_file (@cpan_files) {
289 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
290 unless ( defined $mapped_file ) {
291 print $outfh " Excluded: $cpan_file\n" if $verbose;
295 if ( exists $perl_files{$mapped_file} ) {
296 delete $perl_unseen{$mapped_file};
300 # some CPAN files foo are stored in core as foo.packed,
301 # which are then unpacked by 'make test_prep'
302 my $packed_file = "$mapped_file.packed";
303 if ( exists $perl_files{$packed_file} ) {
304 if ( !-f $mapped_file and -f $packed_file ) {
306 WARNING: $mapped_file not found, but .packed variant exists.
307 Perhaps you need to run 'make test_prep'?
311 delete $perl_unseen{$packed_file};
314 if ( $ignorable{$cpan_file} ) {
315 print $outfh " Ignored: $cpan_file\n" if $verbose;
320 print $outfh " CPAN only: $cpan_file",
321 ( $cpan_file eq $mapped_file )
323 : " (missing $mapped_file)\n";
329 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
331 # should never happen
332 die "ERROR: can't find file $abs_cpan_file\n"
333 unless -f $abs_cpan_file;
335 # might happen if the FILES entry in Maintainers.pl is wrong
336 unless ( -f $mapped_file ) {
337 print $outfh "WARNING: perl file not found: $mapped_file\n";
341 my $relative_mapped_file = relatively_mapped($mapped_file);
344 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
346 if ( $different && customized( $m, $relative_mapped_file ) ) {
348 print $outfh " Customized for blead: $relative_mapped_file\n";
353 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
354 print $outfh $different;
357 if ( $cpan_file eq $relative_mapped_file ) {
358 print $outfh " Modified: $relative_mapped_file\n";
362 " Modified: $cpan_file $relative_mapped_file\n";
365 if ( $cpan_file =~ m{\.pm\z} ) {
366 my $pv = MM->parse_version($mapped_file) || 'unknown';
367 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
370 " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
376 elsif ( customized( $m, $relative_mapped_file ) ) {
377 # Maintainers.pl says we customized it, but it looks the
378 # same as CPAN so maybe we lost the customization, which
380 if ( $cpan_file eq $relative_mapped_file ) {
381 print $outfh " Blead customization missing: $cpan_file\n";
385 " Blead customization missing: $cpan_file $relative_mapped_file\n";
389 if ( $cpan_file eq $relative_mapped_file ) {
390 print $outfh " Unchanged: $cpan_file\n";
394 " Unchanged: $cpan_file $relative_mapped_file\n";
398 for ( sort keys %perl_unseen ) {
399 my $relative_mapped_file = relatively_mapped($_);
400 if ( customized( $m, $relative_mapped_file ) ) {
401 print $outfh " Customized for blead: $_\n";
404 print $outfh " Perl only: $_\n" unless $use_diff;
410 sub relatively_mapped {
411 my $relative = shift;
412 $relative =~ s/^(cpan|dist|ext)\/.*?\///;
416 # given FooBar-1.23_45.tar.gz, return FooBar
420 $d =~ s/\.tar\.gz$//;
422 $d =~ s/[\d\-_\.]+$//;
426 # process --crosscheck action:
427 # ie list all distributions whose CPAN versions differ from that listed in
432 $outfh, $cache_dir, $mirror_url,
433 $force, $modules, $wanted_upstreams,
436 my $file = '02packages.details.txt';
437 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
438 my $path = catfile( $download_dir, $file );
439 my $gzfile = "$path.gz";
441 # grab 02packages.details.txt
443 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
445 if ( !-f $gzfile or $force ) {
447 my_getstore( $url, $gzfile );
450 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
452 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
454 # suck in the data from it
456 open my $fh, '<', $path
457 or die "ERROR: open: $file: $!\n";
465 my @f = split ' ', $_;
468 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
472 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
473 $modules{ $f[0] } = $distro;
475 ( my $short_distro = $distro ) =~ s{^.*/}{};
477 $distros{ distro_base($short_distro) }{$distro} = 1;
480 for my $module (@$modules) {
481 my $m = $Maintainers::Modules{$module}
482 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
484 unless ( $m->{CPAN} ) {
485 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
489 # given an entry like
490 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
491 # first compare the module name against Foo::Bar, and failing that,
494 my $pdist = $m->{DISTRIBUTION};
495 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
497 my $upstream = $m->{UPSTREAM};
498 next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams );
500 my $cdist = $modules{$module};
501 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
503 unless ( defined $cdist ) {
504 my $d = $distros{ distro_base($short_pdist) };
505 unless ( defined $d ) {
506 print $outfh "\n$module: Can't determine current CPAN entry\n";
509 if ( keys %$d > 1 ) {
511 "\n$module: (found more than one CPAN candidate):\n";
512 print $outfh " perl: $pdist\n";
513 print $outfh " CPAN: $_\n" for sort keys %$d;
516 $cdist = ( keys %$d )[0];
519 if ( $cdist ne $pdist ) {
520 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
525 # get the EXCLUDED and MAP entries for this module, or
526 # make up defauts if they don't exist
529 my ( $m, $module_name, $perl_files ) = @_;
531 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
536 return $excluded, $map, $customized if $map;
538 # all files under ext/foo-bar (plus maybe some under t/lib)???
542 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
543 if ( defined $ext and $ext ne $1 ) {
545 # more than one ext/$ext/
560 if ( defined $ext ) {
561 $map = { '' => $ext },;
564 ( my $base = $module_name ) =~ s{::}{/}g;
571 return $excluded, $map, $customized;
574 # Given an exclude list and a mapping hash, convert a CPAN filename
575 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
576 # Returns an empty list for an excluded file
579 my ( $excluded, $map, $customized, $cpan_file ) = @_;
581 for my $exclude (@$excluded) {
582 next if $exclude ~~ $customized;
584 # may be a simple string to match exactly, or a pattern
585 if ( ref $exclude ) {
586 return if $cpan_file =~ $exclude;
589 return if $cpan_file eq $exclude;
593 my $perl_file = $cpan_file;
595 # try longest prefix first, then alphabetically on tie-break
597 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
599 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
604 # fetch a file from a URL and store it in a file given by a filename
607 my ( $url, $file ) = @_;
608 File::Path::mkpath( File::Basename::dirname($file) );
609 if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
610 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
611 File::Copy::copy( $local_path, $file );
613 my $http = HTTP::Tiny->new;
614 my $response = $http->mirror($url, $file);
615 return $response->{success};
619 # download and unpack a distribution
620 # Returns the full pathname of the extracted directory
621 # (eg '/tmp/XYZ/Foo_bar-1.23')
623 # cache_dir: where to download the .tar.gz file to
624 # mirror_url: CPAN mirror to download from
625 # untar_dir: where to untar or unzup the file
626 # module: name of module
627 # dist: name of the distribution
629 sub get_distribution {
630 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
632 $dist =~ m{.+/([^/]+)$}
634 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
637 my $download_file = catfile( $src_dir, $filename );
639 # download distribution
641 if ( -f $download_file and !-s $download_file ) {
643 # failed download might leave a zero-length file
644 unlink $download_file;
647 unless ( -f $download_file ) {
650 my $url = cpan_url_distribution( $mirror_url, $dist );
651 my_getstore( $url, $download_file )
652 or die "ERROR: Could not fetch '$url'\n";
655 # get the expected name of the extracted distribution dir
657 my $path = catfile( $untar_dir, $filename );
659 $path =~ s/\.tar\.gz$//
660 or $path =~ s/\.tgz$//
661 or $path =~ s/\.zip$//
663 "ERROR: downloaded file does not have a recognised suffix: $path\n";
665 # extract it unless we already have it cached or tarball is newer
666 if ( !-d $path || ( -M $download_file < -M $path ) ) {
667 my $ae = Archive::Extract->new( archive => $download_file );
668 $ae->extract( to => $untar_dir )
670 "ERROR: failed to extract distribution '$download_file to temp. dir: "
671 . $ae->error() . "\n";
673 $path = $ae->extract_path;
676 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
681 # produce the diff of a single file
684 my $cpan_file = shift;
685 my $perl_file = shift;
687 my $diff_opts = shift;
689 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
691 push @cmd, $perl_file, $cpan_file;
694 push @cmd, $cpan_file, $perl_file;
701 my ( $module_data, $file ) = @_;
702 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };