This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
core-cpan-diff: various enhancements
[perl5.git] / Porting / core-cpan-diff
CommitLineData
cb097e7a
DM
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
e66db76d 6
cb097e7a
DM
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
a08d2aad
DG
13use File::Basename ();
14use File::Copy ();
cb097e7a
DM
15use File::Temp ();
16use File::Path ();
c4940a93 17use File::Spec::Functions;
cb097e7a
DM
18use Archive::Extract;
19use IO::Uncompress::Gunzip ();
20use File::Compare ();
21use ExtUtils::Manifest;
c4940a93 22use ExtUtils::MakeMaker ();
cb097e7a
DM
23
24BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
25use lib 'Porting';
26use Maintainers ();
27
28# if running from blead, we may be doing -Ilib, which means when we
29# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
30# So preload the things we need, and tell it to check %INC first:
31
32use Archive::Tar;
33use IPC::Open3;
34use IO::Select;
35$Module::Load::Conditional::CHECK_INC_HASH = 1;
36# stop Archive::Extract whinging about lack of Archive::Zip
37$Archive::Extract::WARN = 0;
38
c4940a93
DG
39# where, under the cache dir, to download tarballs to
40use constant SRC_DIR => 'tarballs';
cb097e7a 41# where, under the cache dir, to untar stuff to
cb097e7a
DM
42use constant UNTAR_DIR => 'untarred';
43
44use constant DIFF_CMD => 'diff';
45use constant WGET_CMD => 'wget';
46
47sub usage {
48 print STDERR "\n@_\n\n" if @_;
49 print STDERR <<HERE;
50Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
51
52-a/--all Scan all dual-life modules.
53
54-c/--cachedir Where to save downloaded CPAN tarball files
55 (defaults to /tmp/something/ with deletion after each run).
56
57-d/--diff Display file differences using diff(1), rather than just
58 listing which files have changed.
59 The diff(1) command is assumed to be in your PATH.
60
61--diffopts Options to pass to the diff command. Defaults to '-u'.
62
63-f|force Force download from CPAN of new 02packages.details.txt file
64 (with --crosscheck only).
65
10be9a51
DG
66-m|mirror Preferred CPAN mirror URI (http:// or file:///)
67 (Local mirror must be a complete mirror, not minicpan)
68
cb097e7a
DM
69-o/--output File name to write output to (defaults to STDOUT).
70
71-r/--reverse Reverses the diff (perl to CPAN).
72
2908b263
RS
73-u/--upstream only print modules with the given upstream (defaults to all)
74
cb097e7a
DM
75-v/--verbose List the fate of *all* files in the tarball, not just those
76 that differ or are missing.
77
78-x|crosscheck List the distributions whose current CPAN version differs from
79 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
80
81By default (i.e. without the --crosscheck option), for each listed module
82(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
83from CPAN associated with that module, and compare the files in it with
84those in the perl source tree.
85
86Must be run from the root of the perl source tree.
87Module names must match the keys of %Modules in Maintainers.pl.
88HERE
89 exit(1);
90}
91
92
93sub run {
94 my $scan_all;
95 my $diff_opts;
96 my $reverse = 0;
2908b263 97 my @wanted_upstreams;
cb097e7a 98 my $cache_dir;
10be9a51 99 my $mirror_url = "http://www.cpan.org/";
cb097e7a
DM
100 my $use_diff;
101 my $output_file;
102 my $verbose;
103 my $force;
104 my $do_crosscheck;
105
106 GetOptions(
107 'a|all' => \$scan_all,
108 'c|cachedir=s' => \$cache_dir,
109 'd|diff' => \$use_diff,
110 'diffopts:s' => \$diff_opts,
111 'f|force' => \$force,
112 'h|help' => \&usage,
10be9a51 113 'm|mirror=s' => \$mirror_url,
cb097e7a
DM
114 'o|output=s' => \$output_file,
115 'r|reverse' => \$reverse,
2908b263 116 'u|upstream=s@'=> \@wanted_upstreams,
cb097e7a
DM
117 'v|verbose' => \$verbose,
118 'x|crosscheck' => \$do_crosscheck,
119 ) or usage;
120
121
122 my @modules;
123
124 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
125
126 if ($do_crosscheck) {
127 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
128 if ($reverse || $use_diff || $diff_opts || $verbose);
129 }
130 else {
131 $diff_opts = '-u' unless defined $diff_opts;
132 usage("can't use -f without --crosscheck") if $force;
133 }
134
135 @modules = $scan_all
136 ? grep $Maintainers::Modules{$_}{CPAN},
137 (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
138 : @ARGV;
139 usage("No modules specified") unless @modules;
140
141
142 my $outfh;
143 if (defined $output_file) {
144 open $outfh, '>', $output_file
8c814d1a 145 or die "ERROR: could not open file '$output_file' for writing: $!\n";
cb097e7a
DM
146 }
147 else {
148 open $outfh, ">&STDOUT"
8c814d1a 149 or die "ERROR: can't dup STDOUT: $!\n";
cb097e7a
DM
150 }
151
152 if (defined $cache_dir) {
153 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
154 }
c4940a93
DG
155 else {
156 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
157 }
cb097e7a 158
10be9a51
DG
159 $mirror_url .= "/" unless substr($mirror_url,-1) eq "/";
160 my $test_file = "modules/07mirror.yml";
c4940a93 161 my_getstore(cpan_url($mirror_url, $test_file), catfile($cache_dir, $test_file))
10be9a51
DG
162 or die "ERROR: not a CPAN mirror '$mirror_url'\n";
163
cb097e7a 164 if ($do_crosscheck) {
10be9a51 165 do_crosscheck($outfh, $cache_dir, $mirror_url, $force, \@modules);
cb097e7a
DM
166 }
167 else {
10be9a51 168 do_compare(\@modules, $outfh, $output_file, $cache_dir, $mirror_url, $verbose, $use_diff,
2908b263 169 $reverse, $diff_opts, \@wanted_upstreams);
cb097e7a
DM
170 }
171}
172
10be9a51 173# construct a CPAN url
cb097e7a 174
10be9a51
DG
175sub cpan_url {
176 my ($mirror_url, @path) = @_;
177 return $mirror_url unless @path;
178 my $cpan_path = join( "/", map { split "/", $_ } @path );
179 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
180 return $mirror_url . $cpan_path;
181}
cb097e7a
DM
182
183# compare a list of modules against their CPAN equivalents
184
185sub do_compare {
10be9a51 186 my ($modules, $outfh, $output_file, $cache_dir, $mirror_url, $verbose,
2908b263 187 $use_diff, $reverse, $diff_opts, $wanted_upstreams) = @_;
cb097e7a
DM
188
189
190 # first, make sure we have a directory where they can all be untarred,
191 # and if its a permanent directory, clear any previous content
c4940a93
DG
192 my $untar_dir = catdir($cache_dir, UNTAR_DIR);
193 my $src_dir = catdir($cache_dir, SRC_DIR);
194 for my $d ( $src_dir, $untar_dir ) {
195 next if -d $d;
196 mkdir $d or die "mkdir $d: $!\n";
cb097e7a
DM
197 }
198
199 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
200
201 my %seen_dist;
202 for my $module (@$modules) {
f0ce33d7 203 warn "Processing $module ...\n" if defined $output_file;
cb097e7a 204
e66db76d 205 my $m = $Maintainers::Modules{$module}
cb097e7a
DM
206 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
207
208 unless ($m->{CPAN}) {
209 print $outfh "WARNING: $module is not dual-life; skipping\n";
210 next;
211 }
212
213 my $dist = $m->{DISTRIBUTION};
214 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
215
c4940a93 216 if ($seen_dist{$dist}++) {
cb097e7a
DM
217 warn "WARNING: duplicate entry for $dist in $module\n"
218 }
d55832d0 219
2908b263
RS
220 my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
221 next if @$wanted_upstreams and ! ($upstream ~~ $wanted_upstreams);
d55832d0 222
c4940a93
DG
223 print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n";
224 print $outfh " upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n";
cb097e7a 225
8c814d1a
DM
226 my $cpan_dir;
227 eval {
c4940a93 228 $cpan_dir = get_distribution($src_dir, $mirror_url, $untar_dir, $module, $dist)
8c814d1a
DM
229 };
230 if ($@) {
231 print $outfh " ", $@;
232 print $outfh " (skipping)\n";
233 next;
234 }
cb097e7a
DM
235
236 my @perl_files = Maintainers::get_module_files($module);
237
c4940a93 238 my $manifest = catfile($cpan_dir, 'MANIFEST');
cb097e7a
DM
239 die "ERROR: no such file: $manifest\n" unless -f $manifest;
240
241 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
242 my @cpan_files = sort keys %$cpan_files;
243
c4940a93
DG
244 (my $main_pm = $module) =~ s{::}{/}g;
245 $main_pm .= ".pm";
246
cb097e7a
DM
247 my ($excluded, $map) = get_map($m, $module, \@perl_files);
248
249 my %perl_unseen;
250 @perl_unseen{@perl_files} = ();
251 my %perl_files = %perl_unseen;
252
253 foreach my $cpan_file (@cpan_files) {
254 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
255 unless (defined $mapped_file) {
256 print $outfh " Excluded: $cpan_file\n" if $verbose;
257 next;
258 }
259
260 if (exists $perl_files{$mapped_file}) {
261 delete $perl_unseen{$mapped_file};
262 }
263 else {
264 # some CPAN files foo are stored in core as foo.packed,
265 # which are then unpacked by 'make test_prep'
266 my $packed_file = "$mapped_file.packed";
267 if (exists $perl_files{$packed_file} ) {
268 if (! -f $mapped_file and -f $packed_file) {
269 print $outfh <<EOF;
270WARNING: $mapped_file not found, but .packed variant exists.
271Perhaps you need to run 'make test_prep'?
272EOF
273 next;
274 }
275 delete $perl_unseen{$packed_file};
276 }
277 else {
278 if ($ignorable{$cpan_file}) {
279 print $outfh " Ignored: $cpan_file\n" if $verbose;
280 next;
281 }
282
283 unless ($use_diff) {
284 print $outfh " CPAN only: $cpan_file",
285 ($cpan_file eq $mapped_file) ? "\n"
286 : " (expected $mapped_file)\n";
287 }
288 next;
289 }
290 }
291
292
c4940a93 293 my $abs_cpan_file = catfile($cpan_dir, $cpan_file);
cb097e7a
DM
294
295 # should never happen
296 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
297
298 # might happen if the FILES entry in Maintainers.pl is wrong
299 unless (-f $mapped_file) {
300 print $outfh "WARNING: perl file not found: $mapped_file\n";
301 next;
302 }
303
c4940a93
DG
304 my $relative_mapped_file = $mapped_file;
305 $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///;
306
307 for my $f ( catfile('lib', $main_pm), $main_pm ) {
308 next unless $f eq $relative_mapped_file;
309 my $pv = MM->parse_version($mapped_file) || '(unknown)';
310 my $cv = MM->parse_version($abs_cpan_file) || '(unknown)';
311 if ( $pv ne $cv ) {
312 print $outfh " Version mismatch: $cv (cpan) vs $pv (perl)\n";
313 }
314 }
cb097e7a
DM
315
316 if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
9546b88d
JV
317
318
319 if ($use_diff) {
cb097e7a
DM
320 file_diff($outfh, $abs_cpan_file, $mapped_file,
321 $reverse, $diff_opts);
322 }
323 else {
9546b88d
JV
324 if ($cpan_file eq $relative_mapped_file) {
325 print $outfh " Modified: $relative_mapped_file\n";
cb097e7a
DM
326 }
327 else {
9546b88d 328 print $outfh " Modified: $cpan_file $relative_mapped_file\n";
cb097e7a
DM
329 }
330 }
331 }
332 elsif ($verbose) {
9546b88d 333 if ($cpan_file eq $relative_mapped_file) {
cb097e7a
DM
334 print $outfh " Unchanged: $cpan_file\n";
335 }
336 else {
9546b88d 337 print $outfh " Unchanged: $cpan_file $relative_mapped_file\n";
cb097e7a
DM
338 }
339 }
340 }
341 for (sort keys %perl_unseen) {
342 print $outfh " Perl only: $_\n" unless $use_diff;
343 }
344 }
345}
346
347# given FooBar-1.23_45.tar.gz, return FooBar
348
349sub distro_base {
350 my $d = shift;
351 $d =~ s/\.tar\.gz$//;
352 $d =~ s/\.gip$//;
353 $d =~ s/[\d\-_\.]+$//;
354 return $d;
355}
356
357# process --crosscheck action:
358# ie list all distributions whose CPAN versions differ from that listed in
359# Maintainers.pl
360
361sub do_crosscheck {
10be9a51 362 my ($outfh, $cache_dir, $mirror_url, $force, $modules) = @_;
cb097e7a
DM
363
364 my $file = '02packages.details.txt';
365 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
c4940a93 366 my $path = catfile($download_dir, $file);
cb097e7a
DM
367 my $gzfile = "$path.gz";
368
369 # grab 02packages.details.txt
370
10be9a51 371 my $url = cpan_url($mirror_url, "modules/02packages.details.txt.gz");
cb097e7a
DM
372
373 if (! -f $gzfile or $force) {
374 unlink $gzfile;
375 my_getstore($url, $gzfile);
376 }
377 unlink $path;
378 IO::Uncompress::Gunzip::gunzip($gzfile, $path)
379 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
380
381 # suck in the data from it
e66db76d 382
cb097e7a
DM
383 open my $fh, '<', $path
384 or die "ERROR: open: $file: $!\n";
385
386 my %distros;
387 my %modules;
388
389 while (<$fh>) {
390 next if 1../^$/;
391 chomp;
392 my @f = split ' ', $_;
393 if (@f != 3) {
394 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
395 next;
396 }
cb097e7a 397 my $distro = $f[2];
f0ce33d7
DM
398 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
399 $modules{$f[0]} = $distro;
400
401 (my $short_distro = $distro) =~ s{^.*/}{};
cb097e7a 402
f0ce33d7 403 $distros{distro_base($short_distro)}{$distro} = 1;
cb097e7a
DM
404 }
405
406 for my $module (@$modules) {
e66db76d 407 my $m = $Maintainers::Modules{$module}
cb097e7a
DM
408 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
409
410 unless ($m->{CPAN}) {
411 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
412 next;
413 }
414
f0ce33d7 415 # given an entry like
cb097e7a
DM
416 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
417 # first compare the module name against Foo::Bar, and failing that,
418 # against foo-bar
419
420 my $pdist = $m->{DISTRIBUTION};
421 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
cb097e7a
DM
422
423 my $cdist = $modules{$module};
f0ce33d7 424 (my $short_pdist = $pdist) =~ s{^.*/}{};
cb097e7a 425
f0ce33d7
DM
426 unless (defined $cdist) {
427 my $d = $distros{distro_base($short_pdist)};
cb097e7a
DM
428 unless (defined $d) {
429 print $outfh "\n$module: Can't determine current CPAN entry\n";
430 next;
431 }
432 if (keys %$d > 1) {
433 print $outfh "\n$module: (found more than one CPAN candidate):\n";
434 print $outfh " perl: $pdist\n";
435 print $outfh " CPAN: $_\n" for sort keys %$d;
436 next;
437 }
438 $cdist = (keys %$d)[0];
439 }
440
441 if ($cdist ne $pdist) {
442 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
443 }
444 }
445}
446
447
448
449# get the EXCLUDED and MAP entries for this module, or
450# make up defauts if they don't exist
451
452sub get_map {
453 my ($m, $module_name, $perl_files) = @_;
454
455 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
456
457 $excluded ||= [];
458
459 return $excluded, $map if $map;
460
461 # all files under ext/foo-bar (plus maybe some under t/lib)???
462
463 my $ext;
464 for (@$perl_files) {
a193a2db 465 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
cb097e7a
DM
466 if (defined $ext and $ext ne $1) {
467 # more than one ext/$ext/
468 undef $ext;
469 last;
470 }
471 $ext = $1;
472 }
473 elsif (m{^t/lib/}) {
474 next;
475 }
476 else {
477 undef $ext;
478 last;
479 }
480 }
e66db76d 481
cb097e7a
DM
482 if (defined $ext) {
483 $map = { '' => $ext },
484 }
485 else {
486 (my $base = $module_name) =~ s{::}{/}g;
487 $base ="lib/$base";
488 $map = {
489 'lib/' => 'lib/',
490 '' => "$base/",
491 };
492 }
493 return $excluded, $map;
494}
495
496
497# Given an exclude list and a mapping hash, convert a CPAN filename
498# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
499# Returns an empty list for an excluded file
500
501sub cpan_to_perl {
502 my ($excluded, $map, $cpan_file) = @_;
503
504 for my $exclude (@$excluded) {
505 # may be a simple string to match exactly, or a pattern
506 if (ref $exclude) {
507 return if $cpan_file =~ $exclude;
508 }
509 else {
510 return if $cpan_file eq $exclude;
511 }
512 }
513
514 my $perl_file = $cpan_file;
515
516 # try longest prefix first, then alphabetically on tie-break
517 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
518 {
519 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
520 }
521 return $perl_file;
522}
523
524
525
526# do LWP::Simple::getstore, possibly without LWP::Simple being available
527
528my $lwp_simple_available;
529
530sub my_getstore {
531 my ($url, $file) = @_;
a08d2aad 532 File::Path::mkpath(File::Basename::dirname($file));
cb097e7a
DM
533 unless (defined $lwp_simple_available) {
534 eval { require LWP::Simple };
535 $lwp_simple_available = $@ eq '';
536 }
537 if ($lwp_simple_available) {
538 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
539 }
a08d2aad
DG
540 elsif ( $url =~ qr{\Afile://(?:localhost)?/} ) {
541 (my $local_path = $url) =~ s{\Afile://(?:localhost)?}{};
542 File::Copy::copy( $local_path, $file );
543 }
cb097e7a
DM
544 else {
545 return system(WGET_CMD, "-O", $file, $url) == 0;
546 }
547}
548
549
550# download and unpack a distribution
551# Returns the full pathname of the extracted directory
552# (eg '/tmp/XYZ/Foo_bar-1.23')
553
10be9a51
DG
554# cache_dir: where to download the .tar.gz file to
555# mirror_url: CPAN mirror to download from
e66db76d 556# untar_dir: where to untar or unzup the file
10be9a51
DG
557# module: name of module
558# dist: name of the distribution
cb097e7a
DM
559
560sub get_distribution {
c4940a93 561 my ($src_dir, $mirror_url, $untar_dir, $module, $dist) = @_;
cb097e7a
DM
562
563 $dist =~ m{.+/([^/]+)$}
8c814d1a 564 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
cb097e7a
DM
565 my $filename = $1;
566
c4940a93 567 my $download_file = catfile($src_dir, $filename);
cb097e7a
DM
568
569 # download distribution
570
571 if (-f $download_file and ! -s $download_file ) {
572 # wget can leave a zero-length file on failed download
573 unlink $download_file;
574 }
575
576 unless (-f $download_file) {
577 # not cached
578 $dist =~ /^([A-Z])([A-Z])/
8c814d1a 579 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
cb097e7a 580
10be9a51 581 my $url = cpan_url($mirror_url, "modules/by-authors/id/$1/$1$2/$dist");
cb097e7a 582 my_getstore($url, $download_file)
8c814d1a 583 or die "ERROR: Could not fetch '$url'\n";
cb097e7a
DM
584 }
585
c4940a93 586 # get the expected name of the extracted distribution dir
cb097e7a 587
c4940a93 588 my $path = catfile($untar_dir, $filename);
cb097e7a
DM
589
590 $path =~ s/\.tar\.gz$// or
591 $path =~ s/\.zip$// or
592 die "ERROR: downloaded file does not have a recognised suffix: $path\n";
593
c4940a93
DG
594 # extract it unless we already have it cached or tarball is newer
595 if ( ! -d $path || ( -M $download_file < -M $path ) ) {
596 my $ae = Archive::Extract->new( archive => $download_file);
597 $ae->extract( to => $untar_dir )
598 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n";
599 }
600
cb097e7a
DM
601 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
602
603 return $path;
604}
605
606
607# produce the diff of a single file
608sub file_diff {
609 my $outfh = shift;
610 my $cpan_file = shift;
611 my $perl_file = shift;
612 my $reverse = shift;
613 my $diff_opts = shift;
614
615
616 my @cmd = (DIFF_CMD, split ' ', $diff_opts);
617 if ($reverse) {
618 push @cmd, $perl_file, $cpan_file;
619 }
620 else {
621 push @cmd, $cpan_file, $perl_file;
622 }
623 my $result = `@cmd`;
624
625 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
626
627 print $outfh $result;
628}
629
630
631run();
632