3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
13 use File::Basename ();
17 use File::Spec::Functions;
19 use IO::Uncompress::Gunzip ();
21 use ExtUtils::Manifest;
22 use ExtUtils::MakeMaker ();
25 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
29 # if running from blead, we may be doing -Ilib, which means when we
30 # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
31 # So preload the things we need, and tell it to check %INC first:
36 $Module::Load::Conditional::CHECK_INC_HASH = 1;
38 # stop Archive::Extract whinging about lack of Archive::Zip
39 $Archive::Extract::WARN = 0;
41 # where, under the cache dir, to download tarballs to
42 use constant SRC_DIR => 'tarballs';
44 # where, under the cache dir, to untar stuff to
45 use constant UNTAR_DIR => 'untarred';
47 use constant DIFF_CMD => 'diff';
48 use constant WGET_CMD => 'wget';
51 print STDERR "\n@_\n\n" if @_;
53 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
55 -a/--all Scan all dual-life modules.
57 -c/--cachedir Where to save downloaded CPAN tarball files
58 (defaults to /tmp/something/ with deletion after each run).
60 -d/--diff Display file differences using diff(1), rather than just
61 listing which files have changed.
62 The diff(1) command is assumed to be in your PATH.
64 --diffopts Options to pass to the diff command. Defaults to '-u'.
66 -f|force Force download from CPAN of new 02packages.details.txt file
67 (with --crosscheck only).
69 -m|mirror Preferred CPAN mirror URI (http:// or file:///)
70 (Local mirror must be a complete mirror, not minicpan)
72 -o/--output File name to write output to (defaults to STDOUT).
74 -r/--reverse Reverses the diff (perl to CPAN).
76 -u/--upstream only print modules with the given upstream (defaults to all)
78 -v/--verbose List the fate of *all* files in the tarball, not just those
79 that differ or are missing.
81 -x|crosscheck List the distributions whose current CPAN version differs from
82 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
84 By default (i.e. without the --crosscheck option), for each listed module
85 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
86 from CPAN associated with that module, and compare the files in it with
87 those in the perl source tree.
89 Must be run from the root of the perl source tree.
90 Module names must match the keys of %Modules in Maintainers.pl.
101 my $mirror_url = "http://www.cpan.org/";
109 'a|all' => \$scan_all,
110 'c|cachedir=s' => \$cache_dir,
111 'd|diff' => \$use_diff,
112 'diffopts:s' => \$diff_opts,
113 'f|force' => \$force,
115 'm|mirror=s' => \$mirror_url,
116 'o|output=s' => \$output_file,
117 'r|reverse' => \$reverse,
118 'u|upstream=s@' => \@wanted_upstreams,
119 'v|verbose' => \$verbose,
120 'x|crosscheck' => \$do_crosscheck,
125 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
127 if ($do_crosscheck) {
128 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
129 if ( $reverse || $use_diff || $diff_opts || $verbose );
132 $diff_opts = '-u -b' unless defined $diff_opts;
133 usage("can't use -f without --crosscheck") if $force;
138 ? grep $Maintainers::Modules{$_}{CPAN},
139 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
141 usage("No modules specified") unless @modules;
144 if ( defined $output_file ) {
145 open $outfh, '>', $output_file
146 or die "ERROR: could not open file '$output_file' for writing: $!\n";
149 open $outfh, ">&STDOUT"
150 or die "ERROR: can't dup STDOUT: $!\n";
153 if ( defined $cache_dir ) {
154 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
157 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
160 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
161 my $test_file = "modules/07mirror.yml";
163 cpan_url( $mirror_url, $test_file ),
164 catfile( $cache_dir, $test_file )
165 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
167 if ($do_crosscheck) {
168 do_crosscheck( $outfh, $cache_dir, $mirror_url, $force, \@modules );
172 \@modules, $outfh, $output_file,
173 $cache_dir, $mirror_url, $verbose,
174 $use_diff, $reverse, $diff_opts,
180 # construct a CPAN url
183 my ( $mirror_url, @path ) = @_;
184 return $mirror_url unless @path;
185 my $cpan_path = join( "/", map { split "/", $_ } @path );
186 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
187 return $mirror_url . $cpan_path;
190 # construct a CPAN URL for a author/distribution string like:
191 # BINGOS/Archive-Extract-0.52.tar.gz
193 sub cpan_url_distribution {
194 my ( $mirror_url, $distribution ) = @_;
195 $distribution =~ /^([A-Z])([A-Z])/
196 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
197 my $path = "modules/by-authors/id/$1/$1$2/$distribution";
198 return cpan_url( $mirror_url, $path );
201 # compare a list of modules against their CPAN equivalents
205 $modules, $outfh, $output_file, $cache_dir,
206 $mirror_url, $verbose, $use_diff, $reverse,
207 $diff_opts, $wanted_upstreams
210 # first, make sure we have a directory where they can all be untarred,
211 # and if its a permanent directory, clear any previous content
212 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
213 my $src_dir = catdir( $cache_dir, SRC_DIR );
214 for my $d ( $src_dir, $untar_dir ) {
216 mkdir $d or die "mkdir $d: $!\n";
219 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
222 for my $module (@$modules) {
223 warn "Processing $module ...\n" if defined $output_file;
225 my $m = $Maintainers::Modules{$module}
226 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
228 unless ( $m->{CPAN} ) {
229 print $outfh "WARNING: $module is not dual-life; skipping\n";
233 my $dist = $m->{DISTRIBUTION};
234 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
236 if ( $seen_dist{$dist}++ ) {
237 warn "WARNING: duplicate entry for $dist in $module\n";
240 my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
241 next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams );
243 print $outfh "\n$module - "
244 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
245 print $outfh " upstream is: "
246 . ( $m->{UPSTREAM} || 'UNKNOWN!' ) . "\n";
251 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
255 print $outfh " ", $@;
256 print $outfh " (skipping)\n";
260 my @perl_files = Maintainers::get_module_files($module);
262 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
263 die "ERROR: no such file: $manifest\n" unless -f $manifest;
265 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
266 my @cpan_files = sort keys %$cpan_files;
268 ( my $main_pm = $module ) =~ s{::}{/}g;
271 my ( $excluded, $map, $customized ) =
272 get_map( $m, $module, \@perl_files );
275 @perl_unseen{@perl_files} = ();
276 my %perl_files = %perl_unseen;
278 foreach my $cpan_file (@cpan_files) {
280 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
281 unless ( defined $mapped_file ) {
282 print $outfh " Excluded: $cpan_file\n" if $verbose;
286 if ( exists $perl_files{$mapped_file} ) {
287 delete $perl_unseen{$mapped_file};
291 # some CPAN files foo are stored in core as foo.packed,
292 # which are then unpacked by 'make test_prep'
293 my $packed_file = "$mapped_file.packed";
294 if ( exists $perl_files{$packed_file} ) {
295 if ( !-f $mapped_file and -f $packed_file ) {
297 WARNING: $mapped_file not found, but .packed variant exists.
298 Perhaps you need to run 'make test_prep'?
302 delete $perl_unseen{$packed_file};
305 if ( $ignorable{$cpan_file} ) {
306 print $outfh " Ignored: $cpan_file\n" if $verbose;
311 print $outfh " CPAN only: $cpan_file",
312 ( $cpan_file eq $mapped_file )
314 : " (expected $mapped_file)\n";
320 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
322 # should never happen
323 die "ERROR: can't find file $abs_cpan_file\n"
324 unless -f $abs_cpan_file;
326 # might happen if the FILES entry in Maintainers.pl is wrong
327 unless ( -f $mapped_file ) {
328 print $outfh "WARNING: perl file not found: $mapped_file\n";
332 my $relative_mapped_file = relatively_mapped($mapped_file);
335 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
337 if ( $different && customized( $m, $relative_mapped_file ) ) {
339 print $outfh " Customized for blead: $relative_mapped_file\n";
344 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
345 print $outfh $different;
348 if ( $cpan_file eq $relative_mapped_file ) {
349 print $outfh " Modified: $relative_mapped_file\n";
353 " Modified: $cpan_file $relative_mapped_file\n";
356 if ( $cpan_file =~ m{\.pm\z} ) {
357 my $pv = MM->parse_version($mapped_file) || 'unknown';
358 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
361 " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
367 elsif ( customized( $m, $relative_mapped_file ) ) {
368 # Maintainers.pl says we customized it, but it looks the
369 # same as CPAN so maybe we lost the customization, which
371 if ( $cpan_file eq $relative_mapped_file ) {
372 print $outfh " Blead customization missing: $cpan_file\n";
376 " Blead customization missing: $cpan_file $relative_mapped_file\n";
380 if ( $cpan_file eq $relative_mapped_file ) {
381 print $outfh " Unchanged: $cpan_file\n";
385 " Unchanged: $cpan_file $relative_mapped_file\n";
389 for ( sort keys %perl_unseen ) {
390 my $relative_mapped_file = relatively_mapped($_);
391 if ( customized( $m, $relative_mapped_file ) ) {
392 print $outfh " Customized for blead: $_\n";
395 print $outfh " Perl only: $_\n" unless $use_diff;
401 sub relatively_mapped {
402 my $relative = shift;
403 $relative =~ s/^(cpan|dist|ext)\/.*?\///;
407 # given FooBar-1.23_45.tar.gz, return FooBar
411 $d =~ s/\.tar\.gz$//;
413 $d =~ s/[\d\-_\.]+$//;
417 # process --crosscheck action:
418 # ie list all distributions whose CPAN versions differ from that listed in
422 my ( $outfh, $cache_dir, $mirror_url, $force, $modules ) = @_;
424 my $file = '02packages.details.txt';
425 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
426 my $path = catfile( $download_dir, $file );
427 my $gzfile = "$path.gz";
429 # grab 02packages.details.txt
431 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
433 if ( !-f $gzfile or $force ) {
435 my_getstore( $url, $gzfile );
438 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
440 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
442 # suck in the data from it
444 open my $fh, '<', $path
445 or die "ERROR: open: $file: $!\n";
453 my @f = split ' ', $_;
456 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
460 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
461 $modules{ $f[0] } = $distro;
463 ( my $short_distro = $distro ) =~ s{^.*/}{};
465 $distros{ distro_base($short_distro) }{$distro} = 1;
468 for my $module (@$modules) {
469 my $m = $Maintainers::Modules{$module}
470 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
472 unless ( $m->{CPAN} ) {
473 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
477 # given an entry like
478 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
479 # first compare the module name against Foo::Bar, and failing that,
482 my $pdist = $m->{DISTRIBUTION};
483 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
485 my $cdist = $modules{$module};
486 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
488 unless ( defined $cdist ) {
489 my $d = $distros{ distro_base($short_pdist) };
490 unless ( defined $d ) {
491 print $outfh "\n$module: Can't determine current CPAN entry\n";
494 if ( keys %$d > 1 ) {
496 "\n$module: (found more than one CPAN candidate):\n";
497 print $outfh " perl: $pdist\n";
498 print $outfh " CPAN: $_\n" for sort keys %$d;
501 $cdist = ( keys %$d )[0];
504 if ( $cdist ne $pdist ) {
505 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
510 # get the EXCLUDED and MAP entries for this module, or
511 # make up defauts if they don't exist
514 my ( $m, $module_name, $perl_files ) = @_;
516 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
521 return $excluded, $map, $customized if $map;
523 # all files under ext/foo-bar (plus maybe some under t/lib)???
527 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
528 if ( defined $ext and $ext ne $1 ) {
530 # more than one ext/$ext/
545 if ( defined $ext ) {
546 $map = { '' => $ext },;
549 ( my $base = $module_name ) =~ s{::}{/}g;
556 return $excluded, $map, $customized;
559 # Given an exclude list and a mapping hash, convert a CPAN filename
560 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
561 # Returns an empty list for an excluded file
564 my ( $excluded, $map, $customized, $cpan_file ) = @_;
566 for my $exclude (@$excluded) {
567 next if $exclude ~~ $customized;
569 # may be a simple string to match exactly, or a pattern
570 if ( ref $exclude ) {
571 return if $cpan_file =~ $exclude;
574 return if $cpan_file eq $exclude;
578 my $perl_file = $cpan_file;
580 # try longest prefix first, then alphabetically on tie-break
582 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
584 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
589 # fetch a file from a URL and store it in a file given by a filename
592 my ( $url, $file ) = @_;
593 File::Path::mkpath( File::Basename::dirname($file) );
594 if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
595 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
596 File::Copy::copy( $local_path, $file );
598 my $http = HTTP::Tiny->new;
599 my $response = $http->mirror($url, $file);
600 return $response->{success};
604 # download and unpack a distribution
605 # Returns the full pathname of the extracted directory
606 # (eg '/tmp/XYZ/Foo_bar-1.23')
608 # cache_dir: where to download the .tar.gz file to
609 # mirror_url: CPAN mirror to download from
610 # untar_dir: where to untar or unzup the file
611 # module: name of module
612 # dist: name of the distribution
614 sub get_distribution {
615 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
617 $dist =~ m{.+/([^/]+)$}
619 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
622 my $download_file = catfile( $src_dir, $filename );
624 # download distribution
626 if ( -f $download_file and !-s $download_file ) {
628 # wget can leave a zero-length file on failed download
629 unlink $download_file;
632 unless ( -f $download_file ) {
635 my $url = cpan_url_distribution( $mirror_url, $dist );
636 my_getstore( $url, $download_file )
637 or die "ERROR: Could not fetch '$url'\n";
640 # get the expected name of the extracted distribution dir
642 my $path = catfile( $untar_dir, $filename );
644 $path =~ s/\.tar\.gz$//
645 or $path =~ s/\.zip$//
647 "ERROR: downloaded file does not have a recognised suffix: $path\n";
649 # extract it unless we already have it cached or tarball is newer
650 if ( !-d $path || ( -M $download_file < -M $path ) ) {
651 my $ae = Archive::Extract->new( archive => $download_file );
652 $ae->extract( to => $untar_dir )
654 "ERROR: failed to extract distribution '$download_file to temp. dir: "
655 . $ae->error() . "\n";
657 $path = $ae->extract_path;
660 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
665 # produce the diff of a single file
668 my $cpan_file = shift;
669 my $perl_file = shift;
671 my $diff_opts = shift;
673 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
675 push @cmd, $perl_file, $cpan_file;
678 push @cmd, $cpan_file, $perl_file;
685 my ( $module_data, $file ) = @_;
686 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };