3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
13 use File::Basename ();
17 use File::Spec::Functions;
19 use IO::Uncompress::Gunzip ();
21 use ExtUtils::Manifest;
22 use ExtUtils::MakeMaker ();
25 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
29 # if running from blead, we may be doing -Ilib, which means when we
30 # 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
31 # So preload the things we need, and tell it to check %INC first:
36 $Module::Load::Conditional::CHECK_INC_HASH = 1;
38 # stop Archive::Extract whinging about lack of Archive::Zip
39 $Archive::Extract::WARN = 0;
41 # where, under the cache dir, to download tarballs to
42 use constant SRC_DIR => 'tarballs';
44 # where, under the cache dir, to untar stuff to
45 use constant UNTAR_DIR => 'untarred';
47 use constant DIFF_CMD => 'diff';
48 use constant WGET_CMD => 'wget';
51 print STDERR "\n@_\n\n" if @_;
53 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
55 -a/--all Scan all dual-life modules.
57 -c/--cachedir Where to save downloaded CPAN tarball files
58 (defaults to /tmp/something/ with deletion after each run).
60 -d/--diff Display file differences using diff(1), rather than just
61 listing which files have changed.
62 The diff(1) command is assumed to be in your PATH.
64 --diffopts Options to pass to the diff command. Defaults to '-u'.
66 -f|force Force download from CPAN of new 02packages.details.txt file
67 (with --crosscheck only).
69 -m|mirror Preferred CPAN mirror URI (http:// or file:///)
70 (Local mirror must be a complete mirror, not minicpan)
72 -o/--output File name to write output to (defaults to STDOUT).
74 -r/--reverse Reverses the diff (perl to CPAN).
76 -u/--upstream only print modules with the given upstream (defaults to all)
78 -v/--verbose List the fate of *all* files in the tarball, not just those
79 that differ or are missing.
81 -x|crosscheck List the distributions whose current CPAN version differs from
82 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
84 By default (i.e. without the --crosscheck option), for each listed module
85 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
86 from CPAN associated with that module, and compare the files in it with
87 those in the perl source tree.
89 Must be run from the root of the perl source tree.
90 Module names must match the keys of %Modules in Maintainers.pl.
101 my $mirror_url = "http://www.cpan.org/";
109 'a|all' => \$scan_all,
110 'c|cachedir=s' => \$cache_dir,
111 'd|diff' => \$use_diff,
112 'diffopts:s' => \$diff_opts,
113 'f|force' => \$force,
115 'm|mirror=s' => \$mirror_url,
116 'o|output=s' => \$output_file,
117 'r|reverse' => \$reverse,
118 'u|upstream=s@' => \@wanted_upstreams,
119 'v|verbose' => \$verbose,
120 'x|crosscheck' => \$do_crosscheck,
125 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
127 if ($do_crosscheck) {
128 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
129 if ( $reverse || $use_diff || $diff_opts || $verbose );
132 $diff_opts = '-u -b' unless defined $diff_opts;
133 usage("can't use -f without --crosscheck") if $force;
138 ? grep $Maintainers::Modules{$_}{CPAN},
139 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
141 usage("No modules specified") unless @modules;
144 if ( defined $output_file ) {
145 open $outfh, '>', $output_file
146 or die "ERROR: could not open file '$output_file' for writing: $!\n";
149 open $outfh, ">&STDOUT"
150 or die "ERROR: can't dup STDOUT: $!\n";
153 if ( defined $cache_dir ) {
154 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
157 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
160 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
161 my $test_file = "modules/07mirror.yml";
163 cpan_url( $mirror_url, $test_file ),
164 catfile( $cache_dir, $test_file )
165 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
167 if ($do_crosscheck) {
168 do_crosscheck( $outfh, $cache_dir, $mirror_url, $force, \@modules );
172 \@modules, $outfh, $output_file,
173 $cache_dir, $mirror_url, $verbose,
174 $use_diff, $reverse, $diff_opts,
180 # construct a CPAN url
183 my ( $mirror_url, @path ) = @_;
184 return $mirror_url unless @path;
185 my $cpan_path = join( "/", map { split "/", $_ } @path );
186 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
187 return $mirror_url . $cpan_path;
190 # compare a list of modules against their CPAN equivalents
194 $modules, $outfh, $output_file, $cache_dir,
195 $mirror_url, $verbose, $use_diff, $reverse,
196 $diff_opts, $wanted_upstreams
199 # first, make sure we have a directory where they can all be untarred,
200 # and if its a permanent directory, clear any previous content
201 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
202 my $src_dir = catdir( $cache_dir, SRC_DIR );
203 for my $d ( $src_dir, $untar_dir ) {
205 mkdir $d or die "mkdir $d: $!\n";
208 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
211 for my $module (@$modules) {
212 warn "Processing $module ...\n" if defined $output_file;
214 my $m = $Maintainers::Modules{$module}
215 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
217 unless ( $m->{CPAN} ) {
218 print $outfh "WARNING: $module is not dual-life; skipping\n";
222 my $dist = $m->{DISTRIBUTION};
223 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
225 if ( $seen_dist{$dist}++ ) {
226 warn "WARNING: duplicate entry for $dist in $module\n";
229 my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
230 next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams );
232 print $outfh "\n$module - "
233 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
234 print $outfh " upstream is: "
235 . ( $m->{UPSTREAM} || 'UNKNOWN!' ) . "\n";
240 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
244 print $outfh " ", $@;
245 print $outfh " (skipping)\n";
249 my @perl_files = Maintainers::get_module_files($module);
251 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
252 die "ERROR: no such file: $manifest\n" unless -f $manifest;
254 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
255 my @cpan_files = sort keys %$cpan_files;
257 ( my $main_pm = $module ) =~ s{::}{/}g;
260 my ( $excluded, $map, $customized ) =
261 get_map( $m, $module, \@perl_files );
264 @perl_unseen{@perl_files} = ();
265 my %perl_files = %perl_unseen;
267 foreach my $cpan_file (@cpan_files) {
269 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
270 unless ( defined $mapped_file ) {
271 print $outfh " Excluded: $cpan_file\n" if $verbose;
275 if ( exists $perl_files{$mapped_file} ) {
276 delete $perl_unseen{$mapped_file};
280 # some CPAN files foo are stored in core as foo.packed,
281 # which are then unpacked by 'make test_prep'
282 my $packed_file = "$mapped_file.packed";
283 if ( exists $perl_files{$packed_file} ) {
284 if ( !-f $mapped_file and -f $packed_file ) {
286 WARNING: $mapped_file not found, but .packed variant exists.
287 Perhaps you need to run 'make test_prep'?
291 delete $perl_unseen{$packed_file};
294 if ( $ignorable{$cpan_file} ) {
295 print $outfh " Ignored: $cpan_file\n" if $verbose;
300 print $outfh " CPAN only: $cpan_file",
301 ( $cpan_file eq $mapped_file )
303 : " (expected $mapped_file)\n";
309 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
311 # should never happen
312 die "ERROR: can't find file $abs_cpan_file\n"
313 unless -f $abs_cpan_file;
315 # might happen if the FILES entry in Maintainers.pl is wrong
316 unless ( -f $mapped_file ) {
317 print $outfh "WARNING: perl file not found: $mapped_file\n";
321 my $relative_mapped_file = relatively_mapped($mapped_file);
324 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
326 if ( $different && customized( $m, $relative_mapped_file ) ) {
328 print $outfh " Customized for blead: $relative_mapped_file\n";
333 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
334 print $outfh $different;
337 if ( $cpan_file eq $relative_mapped_file ) {
338 print $outfh " Modified: $relative_mapped_file\n";
342 " Modified: $cpan_file $relative_mapped_file\n";
345 if ( $cpan_file =~ m{\.pm\z} ) {
346 my $pv = MM->parse_version($mapped_file) || 'unknown';
347 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
350 " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
356 elsif ( customized( $m, $relative_mapped_file ) ) {
357 # Maintainers.pl says we customized it, but it looks the
358 # same as CPAN so maybe we lost the customization, which
360 if ( $cpan_file eq $relative_mapped_file ) {
361 print $outfh " Blead customization missing: $cpan_file\n";
365 " Blead customization missing: $cpan_file $relative_mapped_file\n";
369 if ( $cpan_file eq $relative_mapped_file ) {
370 print $outfh " Unchanged: $cpan_file\n";
374 " Unchanged: $cpan_file $relative_mapped_file\n";
378 for ( sort keys %perl_unseen ) {
379 my $relative_mapped_file = relatively_mapped($_);
380 if ( customized( $m, $relative_mapped_file ) ) {
381 print $outfh " Customized for blead: $_\n";
384 print $outfh " Perl only: $_\n" unless $use_diff;
390 sub relatively_mapped {
391 my $relative = shift;
392 $relative =~ s/^(cpan|dist|ext)\/.*?\///;
396 # given FooBar-1.23_45.tar.gz, return FooBar
400 $d =~ s/\.tar\.gz$//;
402 $d =~ s/[\d\-_\.]+$//;
406 # process --crosscheck action:
407 # ie list all distributions whose CPAN versions differ from that listed in
411 my ( $outfh, $cache_dir, $mirror_url, $force, $modules ) = @_;
413 my $file = '02packages.details.txt';
414 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
415 my $path = catfile( $download_dir, $file );
416 my $gzfile = "$path.gz";
418 # grab 02packages.details.txt
420 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
422 if ( !-f $gzfile or $force ) {
424 my_getstore( $url, $gzfile );
427 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
429 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
431 # suck in the data from it
433 open my $fh, '<', $path
434 or die "ERROR: open: $file: $!\n";
442 my @f = split ' ', $_;
445 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
449 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
450 $modules{ $f[0] } = $distro;
452 ( my $short_distro = $distro ) =~ s{^.*/}{};
454 $distros{ distro_base($short_distro) }{$distro} = 1;
457 for my $module (@$modules) {
458 my $m = $Maintainers::Modules{$module}
459 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
461 unless ( $m->{CPAN} ) {
462 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
466 # given an entry like
467 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
468 # first compare the module name against Foo::Bar, and failing that,
471 my $pdist = $m->{DISTRIBUTION};
472 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
474 my $cdist = $modules{$module};
475 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
477 unless ( defined $cdist ) {
478 my $d = $distros{ distro_base($short_pdist) };
479 unless ( defined $d ) {
480 print $outfh "\n$module: Can't determine current CPAN entry\n";
483 if ( keys %$d > 1 ) {
485 "\n$module: (found more than one CPAN candidate):\n";
486 print $outfh " perl: $pdist\n";
487 print $outfh " CPAN: $_\n" for sort keys %$d;
490 $cdist = ( keys %$d )[0];
493 if ( $cdist ne $pdist ) {
494 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
499 # get the EXCLUDED and MAP entries for this module, or
500 # make up defauts if they don't exist
503 my ( $m, $module_name, $perl_files ) = @_;
505 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
510 return $excluded, $map, $customized if $map;
512 # all files under ext/foo-bar (plus maybe some under t/lib)???
516 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
517 if ( defined $ext and $ext ne $1 ) {
519 # more than one ext/$ext/
534 if ( defined $ext ) {
535 $map = { '' => $ext },;
538 ( my $base = $module_name ) =~ s{::}{/}g;
545 return $excluded, $map, $customized;
548 # Given an exclude list and a mapping hash, convert a CPAN filename
549 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
550 # Returns an empty list for an excluded file
553 my ( $excluded, $map, $customized, $cpan_file ) = @_;
555 for my $exclude (@$excluded) {
556 next if $exclude ~~ $customized;
558 # may be a simple string to match exactly, or a pattern
559 if ( ref $exclude ) {
560 return if $cpan_file =~ $exclude;
563 return if $cpan_file eq $exclude;
567 my $perl_file = $cpan_file;
569 # try longest prefix first, then alphabetically on tie-break
571 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
573 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
578 # fetch a file from a URL and store it in a file given by a filename
581 my ( $url, $file ) = @_;
582 File::Path::mkpath( File::Basename::dirname($file) );
583 if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
584 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
585 File::Copy::copy( $local_path, $file );
587 my $http = HTTP::Tiny->new;
588 my $response = $http->mirror($url, $file);
589 return $response->{success};
593 # download and unpack a distribution
594 # Returns the full pathname of the extracted directory
595 # (eg '/tmp/XYZ/Foo_bar-1.23')
597 # cache_dir: where to download the .tar.gz file to
598 # mirror_url: CPAN mirror to download from
599 # untar_dir: where to untar or unzup the file
600 # module: name of module
601 # dist: name of the distribution
603 sub get_distribution {
604 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
606 $dist =~ m{.+/([^/]+)$}
608 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
611 my $download_file = catfile( $src_dir, $filename );
613 # download distribution
615 if ( -f $download_file and !-s $download_file ) {
617 # wget can leave a zero-length file on failed download
618 unlink $download_file;
621 unless ( -f $download_file ) {
624 $dist =~ /^([A-Z])([A-Z])/
626 "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
629 cpan_url( $mirror_url, "modules/by-authors/id/$1/$1$2/$dist" );
630 my_getstore( $url, $download_file )
631 or die "ERROR: Could not fetch '$url'\n";
634 # get the expected name of the extracted distribution dir
636 my $path = catfile( $untar_dir, $filename );
638 $path =~ s/\.tar\.gz$//
639 or $path =~ s/\.zip$//
641 "ERROR: downloaded file does not have a recognised suffix: $path\n";
643 # extract it unless we already have it cached or tarball is newer
644 if ( !-d $path || ( -M $download_file < -M $path ) ) {
645 my $ae = Archive::Extract->new( archive => $download_file );
646 $ae->extract( to => $untar_dir )
648 "ERROR: failed to extract distribution '$download_file to temp. dir: "
649 . $ae->error() . "\n";
651 $path = $ae->extract_path;
654 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
659 # produce the diff of a single file
662 my $cpan_file = shift;
663 my $perl_file = shift;
665 my $diff_opts = shift;
667 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
669 push @cmd, $perl_file, $cpan_file;
672 push @cmd, $cpan_file, $perl_file;
679 my ( $module_data, $file ) = @_;
680 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };