3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
17 use IO::Uncompress::Gunzip ();
19 use ExtUtils::Manifest;
21 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
25 # if running from blead, we may be doing -Ilib, which means when we
26 # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
27 # So preload the things we need, and tell it to check %INC first:
32 $Module::Load::Conditional::CHECK_INC_HASH = 1;
33 # stop Archive::Extract whinging about lack of Archive::Zip
34 $Archive::Extract::WARN = 0;
37 # Files, which if they exist in CPAN but not in perl, will not generate
38 # an 'Only in CPAN' listing
40 our %IGNORABLE = map { ($_ => 1) }
41 qw(.cvsignore .dualLivedDiffConfig .gitignore
42 ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
43 CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
44 GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
45 MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
46 SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
48 # where, under the cache dir, to untar stuff to
50 use constant UNTAR_DIR => 'untarred';
52 use constant DIFF_CMD => 'diff';
53 use constant WGET_CMD => 'wget';
56 print STDERR "\n@_\n\n" if @_;
58 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
60 -a/--all Scan all dual-life modules.
62 -c/--cachedir Where to save downloaded CPAN tarball files
63 (defaults to /tmp/something/ with deletion after each run).
65 -d/--diff Display file differences using diff(1), rather than just
66 listing which files have changed.
67 The diff(1) command is assumed to be in your PATH.
69 --diffopts Options to pass to the diff command. Defaults to '-u'.
71 -f|force Force download from CPAN of new 02packages.details.txt file
72 (with --crosscheck only).
74 -o/--output File name to write output to (defaults to STDOUT).
76 -r/--reverse Reverses the diff (perl to CPAN).
78 -v/--verbose List the fate of *all* files in the tarball, not just those
79 that differ or are missing.
81 -x|crosscheck List the distributions whose current CPAN version differs from
82 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
84 By default (i.e. without the --crosscheck option), for each listed module
85 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
86 from CPAN associated with that module, and compare the files in it with
87 those in the perl source tree.
89 Must be run from the root of the perl source tree.
90 Module names must match the keys of %Modules in Maintainers.pl.
108 'a|all' => \$scan_all,
109 'c|cachedir=s' => \$cache_dir,
110 'd|diff' => \$use_diff,
111 'diffopts:s' => \$diff_opts,
112 'f|force' => \$force,
114 'o|output=s' => \$output_file,
115 'r|reverse' => \$reverse,
116 'v|verbose' => \$verbose,
117 'x|crosscheck' => \$do_crosscheck,
123 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
125 if ($do_crosscheck) {
126 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
127 if ($reverse || $use_diff || $diff_opts || $verbose);
130 $diff_opts = '-u' unless defined $diff_opts;
131 usage("can't use -f without --crosscheck") if $force;
135 ? grep $Maintainers::Modules{$_}{CPAN},
136 (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
138 usage("No modules specified") unless @modules;
142 if (defined $output_file) {
143 open $outfh, '>', $output_file
144 or die "ERROR: could not open file '$output_file' for writing: $!\n";
147 open $outfh, ">&STDOUT"
148 or die "ERROR: can't dup STDOUT: $!\n";
151 if (defined $cache_dir) {
152 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
155 if ($do_crosscheck) {
156 do_crosscheck($outfh, $cache_dir, $force, \@modules);
159 do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff,
160 $reverse, $diff_opts);
166 # compare a list of modules against their CPAN equivalents
169 my ($modules, $outfh, $output_file, $cache_dir, $verbose,
170 $use_diff, $reverse, $diff_opts) = @_;
173 # first, make sure we have a directory where they can all be untarred,
174 # and if its a permanent directory, clear any previous content
177 $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
179 File::Path::rmtree($untar_dir)
180 or die "failed to remove $untar_dir\n";
183 or die "mkdir $untar_dir: $!\n";
186 $untar_dir = File::Temp::tempdir( CLEANUP => 1 );
189 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
192 for my $module (@$modules) {
193 warn "Processing $module ...\n" if defined $output_file;
195 my $m = $Maintainers::Modules{$module}
196 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
198 unless ($m->{CPAN}) {
199 print $outfh "WARNING: $module is not dual-life; skipping\n";
203 my $dist = $m->{DISTRIBUTION};
204 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
206 if ($seen_dist{$dist}) {
207 warn "WARNING: duplicate entry for $dist in $module\n"
210 print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n" unless $use_diff;
211 print $outfh " upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n";
217 $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist)
220 print $outfh " ", $@;
221 print $outfh " (skipping)\n";
225 my @perl_files = Maintainers::get_module_files($module);
227 my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
228 die "ERROR: no such file: $manifest\n" unless -f $manifest;
230 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
231 my @cpan_files = sort keys %$cpan_files;
233 my ($excluded, $map) = get_map($m, $module, \@perl_files);
236 @perl_unseen{@perl_files} = ();
237 my %perl_files = %perl_unseen;
239 foreach my $cpan_file (@cpan_files) {
240 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
241 unless (defined $mapped_file) {
242 print $outfh " Excluded: $cpan_file\n" if $verbose;
246 if (exists $perl_files{$mapped_file}) {
247 delete $perl_unseen{$mapped_file};
250 # some CPAN files foo are stored in core as foo.packed,
251 # which are then unpacked by 'make test_prep'
252 my $packed_file = "$mapped_file.packed";
253 if (exists $perl_files{$packed_file} ) {
254 if (! -f $mapped_file and -f $packed_file) {
256 WARNING: $mapped_file not found, but .packed variant exists.
257 Perhaps you need to run 'make test_prep'?
261 delete $perl_unseen{$packed_file};
264 if ($ignorable{$cpan_file}) {
265 print $outfh " Ignored: $cpan_file\n" if $verbose;
270 print $outfh " CPAN only: $cpan_file",
271 ($cpan_file eq $mapped_file) ? "\n"
272 : " (expected $mapped_file)\n";
279 my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
281 # should never happen
282 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
284 # might happen if the FILES entry in Maintainers.pl is wrong
285 unless (-f $mapped_file) {
286 print $outfh "WARNING: perl file not found: $mapped_file\n";
290 my $relative_mapped_file = $mapped_file;
291 $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///;
293 if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
297 file_diff($outfh, $abs_cpan_file, $mapped_file,
298 $reverse, $diff_opts);
301 if ($cpan_file eq $relative_mapped_file) {
302 print $outfh " Modified: $relative_mapped_file\n";
305 print $outfh " Modified: $cpan_file $relative_mapped_file\n";
310 if ($cpan_file eq $relative_mapped_file) {
311 print $outfh " Unchanged: $cpan_file\n";
314 print $outfh " Unchanged: $cpan_file $relative_mapped_file\n";
318 for (sort keys %perl_unseen) {
319 print $outfh " Perl only: $_\n" unless $use_diff;
324 # given FooBar-1.23_45.tar.gz, return FooBar
328 $d =~ s/\.tar\.gz$//;
330 $d =~ s/[\d\-_\.]+$//;
334 # process --crosscheck action:
335 # ie list all distributions whose CPAN versions differ from that listed in
339 my ($outfh, $cache_dir, $force, $modules) = @_;
341 my $file = '02packages.details.txt';
342 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
343 my $path = File::Spec->catfile($download_dir, $file);
344 my $gzfile = "$path.gz";
346 # grab 02packages.details.txt
348 my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz';
350 if (! -f $gzfile or $force) {
352 my_getstore($url, $gzfile);
355 IO::Uncompress::Gunzip::gunzip($gzfile, $path)
356 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
358 # suck in the data from it
360 open my $fh, '<', $path
361 or die "ERROR: open: $file: $!\n";
369 my @f = split ' ', $_;
371 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
375 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
376 $modules{$f[0]} = $distro;
378 (my $short_distro = $distro) =~ s{^.*/}{};
380 $distros{distro_base($short_distro)}{$distro} = 1;
383 for my $module (@$modules) {
384 my $m = $Maintainers::Modules{$module}
385 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
387 unless ($m->{CPAN}) {
388 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
392 # given an entry like
393 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
394 # first compare the module name against Foo::Bar, and failing that,
397 my $pdist = $m->{DISTRIBUTION};
398 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
400 my $cdist = $modules{$module};
401 (my $short_pdist = $pdist) =~ s{^.*/}{};
403 unless (defined $cdist) {
404 my $d = $distros{distro_base($short_pdist)};
405 unless (defined $d) {
406 print $outfh "\n$module: Can't determine current CPAN entry\n";
410 print $outfh "\n$module: (found more than one CPAN candidate):\n";
411 print $outfh " perl: $pdist\n";
412 print $outfh " CPAN: $_\n" for sort keys %$d;
415 $cdist = (keys %$d)[0];
418 if ($cdist ne $pdist) {
419 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
426 # get the EXCLUDED and MAP entries for this module, or
427 # make up defauts if they don't exist
430 my ($m, $module_name, $perl_files) = @_;
432 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
436 return $excluded, $map if $map;
438 # all files under ext/foo-bar (plus maybe some under t/lib)???
442 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
443 if (defined $ext and $ext ne $1) {
444 # more than one ext/$ext/
460 $map = { '' => $ext },
463 (my $base = $module_name) =~ s{::}{/}g;
470 return $excluded, $map;
474 # Given an exclude list and a mapping hash, convert a CPAN filename
475 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
476 # Returns an empty list for an excluded file
479 my ($excluded, $map, $cpan_file) = @_;
481 for my $exclude (@$excluded) {
482 # may be a simple string to match exactly, or a pattern
484 return if $cpan_file =~ $exclude;
487 return if $cpan_file eq $exclude;
491 my $perl_file = $cpan_file;
493 # try longest prefix first, then alphabetically on tie-break
494 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
496 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
503 # do LWP::Simple::getstore, possibly without LWP::Simple being available
505 my $lwp_simple_available;
508 my ($url, $file) = @_;
509 unless (defined $lwp_simple_available) {
510 eval { require LWP::Simple };
511 $lwp_simple_available = $@ eq '';
513 if ($lwp_simple_available) {
514 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
517 return system(WGET_CMD, "-O", $file, $url) == 0;
522 # download and unpack a distribution
523 # Returns the full pathname of the extracted directory
524 # (eg '/tmp/XYZ/Foo_bar-1.23')
526 # cache_dir: where to dowenload the .tar.gz file to
527 # untar_dir: where to untar or unzup the file
528 # module: name of module
529 # dist: name of the distribution
531 sub get_distribution {
532 my ($cache_dir, $untar_dir, $module, $dist) = @_;
534 $dist =~ m{.+/([^/]+)$}
535 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
538 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
539 my $download_file = File::Spec->catfile($download_dir, $filename);
541 # download distribution
543 if (-f $download_file and ! -s $download_file ) {
544 # wget can leave a zero-length file on failed download
545 unlink $download_file;
548 unless (-f $download_file) {
550 $dist =~ /^([A-Z])([A-Z])/
551 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
553 my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist";
554 my_getstore($url, $download_file)
555 or die "ERROR: Could not fetch '$url'\n";
558 # extract distribution
560 my $ae = Archive::Extract->new( archive => $download_file);
561 $ae->extract( to => $untar_dir )
562 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n";
564 # get the name of the extracted distribution dir
566 my $path = File::Spec->catfile($untar_dir, $filename);
568 $path =~ s/\.tar\.gz$// or
569 $path =~ s/\.zip$// or
570 die "ERROR: downloaded file does not have a recognised suffix: $path\n";
572 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
578 # produce the diff of a single file
581 my $cpan_file = shift;
582 my $perl_file = shift;
584 my $diff_opts = shift;
587 my @cmd = (DIFF_CMD, split ' ', $diff_opts);
589 push @cmd, $perl_file, $cpan_file;
592 push @cmd, $cpan_file, $perl_file;
596 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
598 print $outfh $result;