3 # core-cpan-diff: Compare CPAN modules with their equivalent in core
5 # Originally based on App::DualLivedDiff by Steffen Mueller.
13 use File::Basename ();
18 use File::Spec::Functions;
19 use IO::Uncompress::Gunzip ();
21 use ExtUtils::Manifest;
22 use ExtUtils::MakeMaker ();
25 BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
30 use Cwd qw[cwd chdir];
33 local $Archive::Tar::WARN=0;
35 # where, under the cache dir, to download tarballs to
36 use constant SRC_DIR => 'tarballs';
38 # where, under the cache dir, to untar stuff to
39 use constant UNTAR_DIR => 'untarred';
41 use constant DIFF_CMD => 'diff';
44 print STDERR "\n@_\n\n" if @_;
46 Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
48 -a/--all Scan all dual-life modules.
50 -c/--cachedir Where to save downloaded CPAN tarball files
51 (defaults to /tmp/something/ with deletion after each run).
53 -d/--diff Display file differences using diff(1), rather than just
54 listing which files have changed.
56 --diffopts Options to pass to the diff command. Defaults to '-u'.
58 -f|force Force download from CPAN of new 02packages.details.txt file
59 (with --crosscheck only).
61 -m|mirror Preferred CPAN mirror URI (http:// or file:///)
62 (Local mirror must be a complete mirror, not minicpan)
64 -o/--output File name to write output to (defaults to STDOUT).
66 -r/--reverse Reverses the diff (perl to CPAN).
68 -u/--upstream only print modules with the given upstream (defaults to all)
70 -v/--verbose List the fate of *all* files in the tarball, not just those
71 that differ or are missing.
73 -x|crosscheck List the distributions whose current CPAN version differs from
74 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
76 By default (i.e. without the --crosscheck option), for each listed module
77 (or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
78 from CPAN associated with that module, and compare the files in it with
79 those in the perl source tree.
81 Must be run from the root of the perl source tree.
82 Module names must match the keys of %Modules in Maintainers.pl.
84 The diff(1) command is assumed to be in your PATH and is used to diff files
85 regardless of whether the --diff option has been chosen to display any file
97 my $mirror_url = "http://www.cpan.org/";
105 'a|all' => \$scan_all,
106 'c|cachedir=s' => \$cache_dir,
107 'd|diff' => \$use_diff,
108 'diffopts:s' => \$diff_opts,
109 'f|force' => \$force,
111 'm|mirror=s' => \$mirror_url,
112 'o|output=s' => \$output_file,
113 'r|reverse' => \$reverse,
114 'u|upstream=s@' => \@wanted_upstreams,
115 'v|verbose' => \$verbose,
116 'x|crosscheck' => \$do_crosscheck,
119 @wanted_upstreams = map { $_ eq 'undef' ? undef : $_ } @wanted_upstreams;
123 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
125 if ($do_crosscheck) {
126 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
127 if ( $reverse || $use_diff || $diff_opts || $verbose );
130 $diff_opts = '-u -b' unless defined $diff_opts;
131 usage("can't use -f without --crosscheck") if $force;
136 ? grep $Maintainers::Modules{$_}{CPAN},
137 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
139 usage("No modules specified") unless @modules;
142 if ( defined $output_file ) {
143 open $outfh, '>', $output_file
144 or die "ERROR: could not open file '$output_file' for writing: $!\n";
147 open $outfh, ">&STDOUT"
148 or die "ERROR: can't dup STDOUT: $!\n";
151 if ( defined $cache_dir ) {
152 die "ERROR: not a directory: '$cache_dir'\n"
153 if !-d $cache_dir && -e $cache_dir;
154 File::Path::mkpath($cache_dir);
157 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
160 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
161 my $test_file = "modules/03modlist.data.gz";
163 cpan_url( $mirror_url, $test_file ),
164 catfile( $cache_dir, $test_file )
165 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
167 if ($do_crosscheck) {
169 $outfh, $cache_dir, $mirror_url,
170 $force, \@modules, \@wanted_upstreams
175 \@modules, $outfh, $output_file,
176 $cache_dir, $mirror_url, $verbose,
177 $use_diff, $reverse, $diff_opts,
183 # construct a CPAN url
186 my ( $mirror_url, @path ) = @_;
187 return $mirror_url unless @path;
188 my $cpan_path = join( "/", map { split "/", $_ } @path );
189 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
190 return $mirror_url . $cpan_path;
193 # construct a CPAN URL for a author/distribution string like:
194 # BINGOS/Archive-Extract-0.52.tar.gz
196 sub cpan_url_distribution {
197 my ( $mirror_url, $distribution ) = @_;
198 $distribution =~ /^([A-Z])([A-Z])/
199 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
200 my $path = "authors/id/$1/$1$2/$distribution";
201 return cpan_url( $mirror_url, $path );
204 # compare a list of modules against their CPAN equivalents
208 $modules, $outfh, $output_file, $cache_dir,
209 $mirror_url, $verbose, $use_diff, $reverse,
210 $diff_opts, $wanted_upstreams
213 # first, make sure we have a directory where they can all be untarred,
214 # and if its a permanent directory, clear any previous content
215 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
216 my $src_dir = catdir( $cache_dir, SRC_DIR );
217 for my $d ( $src_dir, $untar_dir ) {
219 mkdir $d or die "mkdir $d: $!\n";
222 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
223 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
226 for my $module (@$modules) {
227 warn "Processing $module ...\n" if defined $output_file;
229 my $m = $Maintainers::Modules{$module}
230 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
232 unless ( $m->{CPAN} ) {
233 print $outfh "WARNING: $module is not dual-life; skipping\n";
237 my $dist = $m->{DISTRIBUTION};
238 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
240 if ( $seen_dist{$dist}++ ) {
241 warn "WARNING: duplicate entry for $dist in $module\n";
244 my $upstream = $m->{UPSTREAM};
245 next if @$wanted_upstreams and !$wanted_upstream{$upstream};
247 print $outfh "\n$module - "
248 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
249 print $outfh " upstream is: "
250 . ( $m->{UPSTREAM} // 'UNKNOWN!' ) . "\n";
255 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
259 print $outfh " ", $@;
260 print $outfh " (skipping)\n";
264 my @perl_files = Maintainers::get_module_files($module);
266 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
267 die "ERROR: no such file: $manifest\n" unless -f $manifest;
269 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
270 my @cpan_files = sort keys %$cpan_files;
272 ( my $main_pm = $module ) =~ s{::}{/}g;
275 my ( $excluded, $map, $customized ) =
276 get_map( $m, $module, \@perl_files );
279 @perl_unseen{@perl_files} = ();
280 my %perl_files = %perl_unseen;
282 foreach my $cpan_file (@cpan_files) {
284 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
285 unless ( defined $mapped_file ) {
286 print $outfh " Excluded: $cpan_file\n" if $verbose;
290 if ( exists $perl_files{$mapped_file} ) {
291 delete $perl_unseen{$mapped_file};
295 # some CPAN files foo are stored in core as foo.packed,
296 # which are then unpacked by 'make test_prep'
297 my $packed_file = "$mapped_file.packed";
298 if ( exists $perl_files{$packed_file} ) {
299 if ( !-f $mapped_file and -f $packed_file ) {
301 WARNING: $mapped_file not found, but .packed variant exists.
302 Perhaps you need to run 'make test_prep'?
306 delete $perl_unseen{$packed_file};
309 if ( $ignorable{$cpan_file} ) {
310 print $outfh " Ignored: $cpan_file\n" if $verbose;
315 print $outfh " CPAN only: $cpan_file",
316 ( $cpan_file eq $mapped_file )
318 : " (missing $mapped_file)\n";
324 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
326 # should never happen
327 die "ERROR: can't find file $abs_cpan_file\n"
328 unless -f $abs_cpan_file;
330 # might happen if the FILES entry in Maintainers.pl is wrong
331 unless ( -f $mapped_file ) {
332 print $outfh "WARNING: perl file not found: $mapped_file\n";
336 my $relative_mapped_file = relatively_mapped($mapped_file);
339 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
341 if ( $different && customized( $m, $relative_mapped_file ) ) {
343 print $outfh " Customized for blead: $relative_mapped_file\n";
348 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
349 print $outfh $different;
352 if ( $cpan_file eq $relative_mapped_file ) {
353 print $outfh " Modified: $relative_mapped_file\n";
357 " Modified: $cpan_file $relative_mapped_file\n";
360 if ( $cpan_file =~ m{\.pm\z} ) {
361 my $pv = MM->parse_version($mapped_file) || 'unknown';
362 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
365 " Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
371 elsif ( customized( $m, $relative_mapped_file ) ) {
372 # Maintainers.pl says we customized it, but it looks the
373 # same as CPAN so maybe we lost the customization, which
375 if ( $cpan_file eq $relative_mapped_file ) {
376 print $outfh " Blead customization missing: $cpan_file\n";
380 " Blead customization missing: $cpan_file $relative_mapped_file\n";
384 if ( $cpan_file eq $relative_mapped_file ) {
385 print $outfh " Unchanged: $cpan_file\n";
389 " Unchanged: $cpan_file $relative_mapped_file\n";
393 for ( sort keys %perl_unseen ) {
394 my $relative_mapped_file = relatively_mapped($_);
395 if ( customized( $m, $relative_mapped_file ) ) {
396 print $outfh " Customized for blead: $_\n";
399 print $outfh " Perl only: $_\n" unless $use_diff;
405 sub relatively_mapped {
406 my $relative = shift;
407 $relative =~ s/^(cpan|dist|ext)\/.*?\///;
411 # given FooBar-1.23_45.tar.gz, return FooBar
415 $d =~ s/\.tar\.gz$//;
417 $d =~ s/[\d\-_\.]+$//;
421 # process --crosscheck action:
422 # ie list all distributions whose CPAN versions differ from that listed in
427 $outfh, $cache_dir, $mirror_url,
428 $force, $modules, $wanted_upstreams,
431 my $file = '02packages.details.txt';
432 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
433 my $path = catfile( $download_dir, $file );
434 my $gzfile = "$path.gz";
436 # grab 02packages.details.txt
438 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
440 if ( !-f $gzfile or $force ) {
442 my_getstore( $url, $gzfile );
445 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
447 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
449 # suck in the data from it
451 open my $fh, '<', $path
452 or die "ERROR: open: $file: $!\n";
460 my @f = split ' ', $_;
463 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
467 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
468 $modules{ $f[0] } = $distro;
470 ( my $short_distro = $distro ) =~ s{^.*/}{};
472 $distros{ distro_base($short_distro) }{$distro} = 1;
475 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
476 for my $module (@$modules) {
477 my $m = $Maintainers::Modules{$module}
478 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
480 unless ( $m->{CPAN} ) {
481 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
485 # given an entry like
486 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
487 # first compare the module name against Foo::Bar, and failing that,
490 my $pdist = $m->{DISTRIBUTION};
491 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
493 my $upstream = $m->{UPSTREAM};
494 next if @$wanted_upstreams and !$wanted_upstream{$upstream};
496 my $cdist = $modules{$module};
497 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
499 unless ( defined $cdist ) {
500 my $d = $distros{ distro_base($short_pdist) };
501 unless ( defined $d ) {
502 print $outfh "\n$module: Can't determine current CPAN entry\n";
505 if ( keys %$d > 1 ) {
507 "\n$module: (found more than one CPAN candidate):\n";
508 print $outfh " perl: $pdist\n";
509 print $outfh " CPAN: $_\n" for sort keys %$d;
512 $cdist = ( keys %$d )[0];
515 if ( $cdist ne $pdist ) {
516 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
521 # get the EXCLUDED and MAP entries for this module, or
522 # make up defaults if they don't exist
525 my ( $m, $module_name, $perl_files ) = @_;
527 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
532 return $excluded, $map, $customized if $map;
534 # all files under ext/foo-bar (plus maybe some under t/lib)???
538 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
539 if ( defined $ext and $ext ne $1 ) {
541 # more than one ext/$ext/
556 if ( defined $ext ) {
557 $map = { '' => $ext },;
560 ( my $base = $module_name ) =~ s{::}{/}g;
567 return $excluded, $map, $customized;
570 # Given an exclude list and a mapping hash, convert a CPAN filename
571 # (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
572 # Returns an empty list for an excluded file
575 my ( $excluded, $map, $customized, $cpan_file ) = @_;
577 my %customized = map { ( $_ => 1 ) } @$customized;
578 for my $exclude (@$excluded) {
579 next if $customized{$exclude};
581 # may be a simple string to match exactly, or a pattern
582 if ( ref $exclude ) {
583 return if $cpan_file =~ $exclude;
586 return if $cpan_file eq $exclude;
590 my $perl_file = $cpan_file;
592 # try longest prefix first, then alphabetically on tie-break
594 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
596 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
601 # fetch a file from a URL and store it in a file given by a filename
604 my ( $url, $file ) = @_;
605 File::Path::mkpath( File::Basename::dirname($file) );
606 if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
607 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
608 File::Copy::copy( $local_path, $file );
610 my $http = HTTP::Tiny->new;
611 my $response = $http->mirror($url, $file);
612 return $response->{success};
616 # download and unpack a distribution
617 # Returns the full pathname of the extracted directory
618 # (eg '/tmp/XYZ/Foo_bar-1.23')
620 # cache_dir: where to download the .tar.gz file to
621 # mirror_url: CPAN mirror to download from
622 # untar_dir: where to untar or unzup the file
623 # module: name of module
624 # dist: name of the distribution
626 sub get_distribution {
627 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
629 $dist =~ m{.+/([^/]+)$}
631 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
634 my $download_file = catfile( $src_dir, $filename );
636 # download distribution
638 if ( -f $download_file and !-s $download_file ) {
640 # failed download might leave a zero-length file
641 unlink $download_file;
644 unless ( -f $download_file ) {
647 my $url = cpan_url_distribution( $mirror_url, $dist );
648 my_getstore( $url, $download_file )
649 or die "ERROR: Could not fetch '$url'\n";
652 # get the expected name of the extracted distribution dir
654 my $path = catfile( $untar_dir, $filename );
656 $path =~ s/\.tar\.gz$//
657 or $path =~ s/\.tgz$//
658 or $path =~ s/\.zip$//
660 "ERROR: downloaded file does not have a recognised suffix: $path\n";
662 # extract it unless we already have it cached or tarball is newer
663 if ( !-d $path || ( -M $download_file < -M $path ) ) {
664 $path = extract( $download_file, $untar_dir )
666 "ERROR: failed to extract distribution '$download_file to temp. dir: "
670 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
675 # produce the diff of a single file
678 my $cpan_file = shift;
679 my $perl_file = shift;
681 my $diff_opts = shift;
683 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
685 push @cmd, $perl_file, $cpan_file;
688 push @cmd, $cpan_file, $perl_file;
695 my ( $module_data, $file ) = @_;
696 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };
700 my ($archive,$to) = @_;
702 chdir $to or die "$!\n";
705 local $Archive::Tar::CHOWN = 0;
707 unless ( $next = Archive::Tar->iter( $archive, 1 ) ) {
708 $! = $Archive::Tar::error;
711 while ( my $file = $next->() ) {
712 push @files, $file->full_path;
713 unless ( $file->extract ) {
714 $! = $Archive::Tar::error;
719 my $path = __get_extract_dir( \@files );
720 chdir $cwd or die "$!\n";
724 sub __get_extract_dir {
725 my $files = shift || [];
727 return unless scalar @$files;
730 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
731 my($dir,$pos) = @$aref;
733 ### add a catdir(), so that any trailing slashes get
734 ### take care of (removed)
735 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
736 ### which was the problem in bug #23999
737 my $res = -d $files->[$pos]
738 ? File::Spec->catdir( $files->[$pos], '' )
739 : File::Spec->catdir( File::Basename::dirname( $files->[$pos] ) );
744 ### if the first and last dir don't match, make sure the
745 ### dirname is not set wrongly
748 ### dirs are the same, so we know for sure what the extract dir is
749 if( $dir1 eq $dir2 ) {
752 ### dirs are different.. do they share the base dir?
753 ### if so, use that, if not, fall back to '.'
755 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
756 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
758 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
761 return File::Spec->rel2abs( $dir );