3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
13 use File::Basename ();
17 use File::Spec::Functions;
19 use IO::Uncompress::Gunzip ();
21 use ExtUtils::Manifest;
22 use ExtUtils::MakeMaker ();
24 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
28 # if running from blead, we may be doing -Ilib, which means when we
29 # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
30 # So preload the things we need, and tell it to check %INC first:
35 $Module::Load::Conditional::CHECK_INC_HASH = 1;
37 # stop Archive::Extract whinging about lack of Archive::Zip
38 $Archive::Extract::WARN = 0;
40 # where, under the cache dir, to download tarballs to
41 use constant SRC_DIR => 'tarballs';
43 # where, under the cache dir, to untar stuff to
44 use constant UNTAR_DIR => 'untarred';
46 use constant DIFF_CMD => 'diff';
47 use constant WGET_CMD => 'wget';
50 print STDERR "\n@_\n\n" if @_;
52 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
54 -a/--all Scan all dual-life modules.
56 -c/--cachedir Where to save downloaded CPAN tarball files
57 (defaults to /tmp/something/ with deletion after each run).
59 -d/--diff Display file differences using diff(1), rather than just
60 listing which files have changed.
61 The diff(1) command is assumed to be in your PATH.
63 --diffopts Options to pass to the diff command. Defaults to '-u'.
65 -f|force Force download from CPAN of new 02packages.details.txt file
66 (with --crosscheck only).
68 -m|mirror Preferred CPAN mirror URI (http:// or file:///)
69 (Local mirror must be a complete mirror, not minicpan)
71 -o/--output File name to write output to (defaults to STDOUT).
73 -r/--reverse Reverses the diff (perl to CPAN).
75 -u/--upstream only print modules with the given upstream (defaults to all)
77 -v/--verbose List the fate of *all* files in the tarball, not just those
78 that differ or are missing.
80 -x|crosscheck List the distributions whose current CPAN version differs from
81 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
83 By default (i.e. without the --crosscheck option), for each listed module
84 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
85 from CPAN associated with that module, and compare the files in it with
86 those in the perl source tree.
88 Must be run from the root of the perl source tree.
89 Module names must match the keys of %Modules in Maintainers.pl.
100 my $mirror_url = "http://www.cpan.org/";
108 'a|all' => \$scan_all,
109 'c|cachedir=s' => \$cache_dir,
110 'd|diff' => \$use_diff,
111 'diffopts:s' => \$diff_opts,
112 'f|force' => \$force,
114 'm|mirror=s' => \$mirror_url,
115 'o|output=s' => \$output_file,
116 'r|reverse' => \$reverse,
117 'u|upstream=s@' => \@wanted_upstreams,
118 'v|verbose' => \$verbose,
119 'x|crosscheck' => \$do_crosscheck,
124 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
126 if ($do_crosscheck) {
127 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
128 if ( $reverse || $use_diff || $diff_opts || $verbose );
131 $diff_opts = '-u -b' unless defined $diff_opts;
132 usage("can't use -f without --crosscheck") if $force;
137 ? grep $Maintainers::Modules{$_}{CPAN},
138 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
140 usage("No modules specified") unless @modules;
143 if ( defined $output_file ) {
144 open $outfh, '>', $output_file
145 or die "ERROR: could not open file '$output_file' for writing: $!\n";
148 open $outfh, ">&STDOUT"
149 or die "ERROR: can't dup STDOUT: $!\n";
152 if ( defined $cache_dir ) {
153 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
156 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
159 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
160 my $test_file = "modules/07mirror.yml";
162 cpan_url( $mirror_url, $test_file ),
163 catfile( $cache_dir, $test_file )
164 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
166 if ($do_crosscheck) {
167 do_crosscheck( $outfh, $cache_dir, $mirror_url, $force, \@modules );
171 \@modules, $outfh, $output_file,
172 $cache_dir, $mirror_url, $verbose,
173 $use_diff, $reverse, $diff_opts,
179 # construct a CPAN url
182 my ( $mirror_url, @path ) = @_;
183 return $mirror_url unless @path;
184 my $cpan_path = join( "/", map { split "/", $_ } @path );
185 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
186 return $mirror_url . $cpan_path;
189 # compare a list of modules against their CPAN equivalents
193 $modules, $outfh, $output_file, $cache_dir,
194 $mirror_url, $verbose, $use_diff, $reverse,
195 $diff_opts, $wanted_upstreams
198 # first, make sure we have a directory where they can all be untarred,
199 # and if its a permanent directory, clear any previous content
200 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
201 my $src_dir = catdir( $cache_dir, SRC_DIR );
202 for my $d ( $src_dir, $untar_dir ) {
204 mkdir $d or die "mkdir $d: $!\n";
207 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
210 for my $module (@$modules) {
211 warn "Processing $module ...\n" if defined $output_file;
213 my $m = $Maintainers::Modules{$module}
214 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
216 unless ( $m->{CPAN} ) {
217 print $outfh "WARNING: $module is not dual-life; skipping\n";
221 my $dist = $m->{DISTRIBUTION};
222 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
224 if ( $seen_dist{$dist}++ ) {
225 warn "WARNING: duplicate entry for $dist in $module\n";
228 my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
229 next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams );
231 print $outfh "\n$module - "
232 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
233 print $outfh " upstream is: "
234 . ( $m->{UPSTREAM} || 'UNKNOWN!' ) . "\n";
239 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
243 print $outfh " ", $@;
244 print $outfh " (skipping)\n";
248 my @perl_files = Maintainers::get_module_files($module);
250 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
251 die "ERROR: no such file: $manifest\n" unless -f $manifest;
253 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
254 my @cpan_files = sort keys %$cpan_files;
256 ( my $main_pm = $module ) =~ s{::}{/}g;
259 my ( $excluded, $map, $customized ) =
260 get_map( $m, $module, \@perl_files );
263 @perl_unseen{@perl_files} = ();
264 my %perl_files = %perl_unseen;
266 foreach my $cpan_file (@cpan_files) {
268 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
269 unless ( defined $mapped_file ) {
270 print $outfh " Excluded: $cpan_file\n" if $verbose;
274 if ( exists $perl_files{$mapped_file} ) {
275 delete $perl_unseen{$mapped_file};
279 # some CPAN files foo are stored in core as foo.packed,
280 # which are then unpacked by 'make test_prep'
281 my $packed_file = "$mapped_file.packed";
282 if ( exists $perl_files{$packed_file} ) {
283 if ( !-f $mapped_file and -f $packed_file ) {
285 WARNING: $mapped_file not found, but .packed variant exists.
286 Perhaps you need to run 'make test_prep'?
290 delete $perl_unseen{$packed_file};
293 if ( $ignorable{$cpan_file} ) {
294 print $outfh " Ignored: $cpan_file\n" if $verbose;
299 print $outfh " CPAN only: $cpan_file",
300 ( $cpan_file eq $mapped_file )
302 : " (expected $mapped_file)\n";
308 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
310 # should never happen
311 die "ERROR: can't find file $abs_cpan_file\n"
312 unless -f $abs_cpan_file;
314 # might happen if the FILES entry in Maintainers.pl is wrong
315 unless ( -f $mapped_file ) {
316 print $outfh "WARNING: perl file not found: $mapped_file\n";
320 my $relative_mapped_file = $mapped_file;
321 $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///;
324 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
326 if ( $different && customized( $m, $relative_mapped_file ) ) {
328 print $outfh " Customized: $relative_mapped_file\n";
333 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
334 print $outfh $different;
337 if ( $cpan_file eq $relative_mapped_file ) {
338 print $outfh " Modified: $relative_mapped_file\n";
342 " Modified: $cpan_file $relative_mapped_file\n";
345 if ( $cpan_file =~ m{\.pm\z} ) {
346 my $pv = MM->parse_version($mapped_file) || 'unknown';
347 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
350 " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
357 if ( $cpan_file eq $relative_mapped_file ) {
358 print $outfh " Unchanged: $cpan_file\n";
362 " Unchanged: $cpan_file $relative_mapped_file\n";
366 for ( sort keys %perl_unseen ) {
367 print $outfh " Perl only: $_\n" unless $use_diff;
372 # given FooBar-1.23_45.tar.gz, return FooBar
376 $d =~ s/\.tar\.gz$//;
378 $d =~ s/[\d\-_\.]+$//;
382 # process --crosscheck action:
383 # ie list all distributions whose CPAN versions differ from that listed in
387 my ( $outfh, $cache_dir, $mirror_url, $force, $modules ) = @_;
389 my $file = '02packages.details.txt';
390 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
391 my $path = catfile( $download_dir, $file );
392 my $gzfile = "$path.gz";
394 # grab 02packages.details.txt
396 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
398 if ( !-f $gzfile or $force ) {
400 my_getstore( $url, $gzfile );
403 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
405 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
407 # suck in the data from it
409 open my $fh, '<', $path
410 or die "ERROR: open: $file: $!\n";
418 my @f = split ' ', $_;
421 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
425 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
426 $modules{ $f[0] } = $distro;
428 ( my $short_distro = $distro ) =~ s{^.*/}{};
430 $distros{ distro_base($short_distro) }{$distro} = 1;
433 for my $module (@$modules) {
434 my $m = $Maintainers::Modules{$module}
435 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
437 unless ( $m->{CPAN} ) {
438 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
442 # given an entry like
443 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
444 # first compare the module name against Foo::Bar, and failing that,
447 my $pdist = $m->{DISTRIBUTION};
448 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
450 my $cdist = $modules{$module};
451 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
453 unless ( defined $cdist ) {
454 my $d = $distros{ distro_base($short_pdist) };
455 unless ( defined $d ) {
456 print $outfh "\n$module: Can't determine current CPAN entry\n";
459 if ( keys %$d > 1 ) {
461 "\n$module: (found more than one CPAN candidate):\n";
462 print $outfh " perl: $pdist\n";
463 print $outfh " CPAN: $_\n" for sort keys %$d;
466 $cdist = ( keys %$d )[0];
469 if ( $cdist ne $pdist ) {
470 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
475 # get the EXCLUDED and MAP entries for this module, or
476 # make up defauts if they don't exist
479 my ( $m, $module_name, $perl_files ) = @_;
481 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
486 return $excluded, $map, $customized if $map;
488 # all files under ext/foo-bar (plus maybe some under t/lib)???
492 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
493 if ( defined $ext and $ext ne $1 ) {
495 # more than one ext/$ext/
510 if ( defined $ext ) {
511 $map = { '' => $ext },;
514 ( my $base = $module_name ) =~ s{::}{/}g;
521 return $excluded, $map, $customized;
524 # Given an exclude list and a mapping hash, convert a CPAN filename
525 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
526 # Returns an empty list for an excluded file
529 my ( $excluded, $map, $customized, $cpan_file ) = @_;
531 for my $exclude (@$excluded) {
532 next if $exclude ~~ $customized;
534 # may be a simple string to match exactly, or a pattern
535 if ( ref $exclude ) {
536 return if $cpan_file =~ $exclude;
539 return if $cpan_file eq $exclude;
543 my $perl_file = $cpan_file;
545 # try longest prefix first, then alphabetically on tie-break
547 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
549 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
554 # do LWP::Simple::getstore, possibly without LWP::Simple being available
556 my $lwp_simple_available;
559 my ( $url, $file ) = @_;
560 File::Path::mkpath( File::Basename::dirname($file) );
561 unless ( defined $lwp_simple_available ) {
562 eval { require LWP::Simple };
563 $lwp_simple_available = $@ eq '';
565 if ($lwp_simple_available) {
566 return LWP::Simple::is_success( LWP::Simple::getstore( $url, $file ) );
568 elsif ( $url =~ qr{\Afile://(?:localhost)?/} ) {
569 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
570 File::Copy::copy( $local_path, $file );
573 return system( WGET_CMD, "-O", $file, $url ) == 0;
577 # download and unpack a distribution
578 # Returns the full pathname of the extracted directory
579 # (eg '/tmp/XYZ/Foo_bar-1.23')
581 # cache_dir: where to download the .tar.gz file to
582 # mirror_url: CPAN mirror to download from
583 # untar_dir: where to untar or unzup the file
584 # module: name of module
585 # dist: name of the distribution
587 sub get_distribution {
588 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
590 $dist =~ m{.+/([^/]+)$}
592 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
595 my $download_file = catfile( $src_dir, $filename );
597 # download distribution
599 if ( -f $download_file and !-s $download_file ) {
601 # wget can leave a zero-length file on failed download
602 unlink $download_file;
605 unless ( -f $download_file ) {
608 $dist =~ /^([A-Z])([A-Z])/
610 "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
613 cpan_url( $mirror_url, "modules/by-authors/id/$1/$1$2/$dist" );
614 my_getstore( $url, $download_file )
615 or die "ERROR: Could not fetch '$url'\n";
618 # get the expected name of the extracted distribution dir
620 my $path = catfile( $untar_dir, $filename );
622 $path =~ s/\.tar\.gz$//
623 or $path =~ s/\.zip$//
625 "ERROR: downloaded file does not have a recognised suffix: $path\n";
627 # extract it unless we already have it cached or tarball is newer
628 if ( !-d $path || ( -M $download_file < -M $path ) ) {
629 my $ae = Archive::Extract->new( archive => $download_file );
630 $ae->extract( to => $untar_dir )
632 "ERROR: failed to extract distribution '$download_file to temp. dir: "
633 . $ae->error() . "\n";
635 $path = $ae->extract_path;
638 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
643 # produce the diff of a single file
646 my $cpan_file = shift;
647 my $perl_file = shift;
649 my $diff_opts = shift;
651 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
653 push @cmd, $perl_file, $cpan_file;
656 push @cmd, $cpan_file, $perl_file;
663 my ( $module_data, $file ) = @_;
664 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };