This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
core-cpan-diff: fix handling of file:/// urls
[perl5.git] / Porting / core-cpan-diff
CommitLineData
cb097e7a
DM
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
e66db76d 6
cb097e7a
DM
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
a08d2aad
DG
13use File::Basename ();
14use File::Copy ();
cb097e7a
DM
15use File::Temp ();
16use File::Path ();
17use File::Spec;
10be9a51 18use File::Spec::Unix ();
cb097e7a
DM
19use Archive::Extract;
20use IO::Uncompress::Gunzip ();
21use File::Compare ();
22use ExtUtils::Manifest;
23
24BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
25use lib 'Porting';
26use Maintainers ();
27
28# if running from blead, we may be doing -Ilib, which means when we
29# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
30# So preload the things we need, and tell it to check %INC first:
31
32use Archive::Tar;
33use IPC::Open3;
34use IO::Select;
35$Module::Load::Conditional::CHECK_INC_HASH = 1;
36# stop Archive::Extract whinging about lack of Archive::Zip
37$Archive::Extract::WARN = 0;
38
39
40# Files, which if they exist in CPAN but not in perl, will not generate
41# an 'Only in CPAN' listing
42#
43our %IGNORABLE = map { ($_ => 1) }
44 qw(.cvsignore .dualLivedDiffConfig .gitignore
45 ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
46 CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
47 GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
48 MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
49 SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
50
51# where, under the cache dir, to untar stuff to
52
53use constant UNTAR_DIR => 'untarred';
54
55use constant DIFF_CMD => 'diff';
56use constant WGET_CMD => 'wget';
57
58sub usage {
59 print STDERR "\n@_\n\n" if @_;
60 print STDERR <<HERE;
61Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
62
63-a/--all Scan all dual-life modules.
64
65-c/--cachedir Where to save downloaded CPAN tarball files
66 (defaults to /tmp/something/ with deletion after each run).
67
68-d/--diff Display file differences using diff(1), rather than just
69 listing which files have changed.
70 The diff(1) command is assumed to be in your PATH.
71
72--diffopts Options to pass to the diff command. Defaults to '-u'.
73
74-f|force Force download from CPAN of new 02packages.details.txt file
75 (with --crosscheck only).
76
10be9a51
DG
77-m|mirror Preferred CPAN mirror URI (http:// or file:///)
78 (Local mirror must be a complete mirror, not minicpan)
79
cb097e7a
DM
80-o/--output File name to write output to (defaults to STDOUT).
81
82-r/--reverse Reverses the diff (perl to CPAN).
83
2908b263
RS
84-u/--upstream only print modules with the given upstream (defaults to all)
85
cb097e7a
DM
86-v/--verbose List the fate of *all* files in the tarball, not just those
87 that differ or are missing.
88
89-x|crosscheck List the distributions whose current CPAN version differs from
90 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
91
92By default (i.e. without the --crosscheck option), for each listed module
93(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
94from CPAN associated with that module, and compare the files in it with
95those in the perl source tree.
96
97Must be run from the root of the perl source tree.
98Module names must match the keys of %Modules in Maintainers.pl.
99HERE
100 exit(1);
101}
102
103
104sub run {
105 my $scan_all;
106 my $diff_opts;
107 my $reverse = 0;
2908b263 108 my @wanted_upstreams;
cb097e7a 109 my $cache_dir;
10be9a51 110 my $mirror_url = "http://www.cpan.org/";
cb097e7a
DM
111 my $use_diff;
112 my $output_file;
113 my $verbose;
114 my $force;
115 my $do_crosscheck;
116
117 GetOptions(
118 'a|all' => \$scan_all,
119 'c|cachedir=s' => \$cache_dir,
120 'd|diff' => \$use_diff,
121 'diffopts:s' => \$diff_opts,
122 'f|force' => \$force,
123 'h|help' => \&usage,
10be9a51 124 'm|mirror=s' => \$mirror_url,
cb097e7a
DM
125 'o|output=s' => \$output_file,
126 'r|reverse' => \$reverse,
2908b263 127 'u|upstream=s@'=> \@wanted_upstreams,
cb097e7a
DM
128 'v|verbose' => \$verbose,
129 'x|crosscheck' => \$do_crosscheck,
130 ) or usage;
131
132
133 my @modules;
134
135 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
136
137 if ($do_crosscheck) {
138 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
139 if ($reverse || $use_diff || $diff_opts || $verbose);
140 }
141 else {
142 $diff_opts = '-u' unless defined $diff_opts;
143 usage("can't use -f without --crosscheck") if $force;
144 }
145
146 @modules = $scan_all
147 ? grep $Maintainers::Modules{$_}{CPAN},
148 (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
149 : @ARGV;
150 usage("No modules specified") unless @modules;
151
152
153 my $outfh;
154 if (defined $output_file) {
155 open $outfh, '>', $output_file
8c814d1a 156 or die "ERROR: could not open file '$output_file' for writing: $!\n";
cb097e7a
DM
157 }
158 else {
159 open $outfh, ">&STDOUT"
8c814d1a 160 or die "ERROR: can't dup STDOUT: $!\n";
cb097e7a
DM
161 }
162
163 if (defined $cache_dir) {
164 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
165 }
166
10be9a51
DG
167 $mirror_url .= "/" unless substr($mirror_url,-1) eq "/";
168 my $test_file = "modules/07mirror.yml";
169 my_getstore(cpan_url($mirror_url, $test_file), local_path($cache_dir, $test_file))
170 or die "ERROR: not a CPAN mirror '$mirror_url'\n";
171
cb097e7a 172 if ($do_crosscheck) {
10be9a51 173 do_crosscheck($outfh, $cache_dir, $mirror_url, $force, \@modules);
cb097e7a
DM
174 }
175 else {
10be9a51 176 do_compare(\@modules, $outfh, $output_file, $cache_dir, $mirror_url, $verbose, $use_diff,
2908b263 177 $reverse, $diff_opts, \@wanted_upstreams);
cb097e7a
DM
178 }
179}
180
10be9a51
DG
181# construct a local path either in cache dir or tempdir
182
183sub local_path {
184 my ($cache_dir, @path) = @_;
185 my $local_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
186 return File::Spec->catfile($local_dir, @path);
187}
188
189# construct a CPAN url
cb097e7a 190
10be9a51
DG
191sub cpan_url {
192 my ($mirror_url, @path) = @_;
193 return $mirror_url unless @path;
194 my $cpan_path = join( "/", map { split "/", $_ } @path );
195 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
196 return $mirror_url . $cpan_path;
197}
cb097e7a
DM
198
199# compare a list of modules against their CPAN equivalents
200
201sub do_compare {
10be9a51 202 my ($modules, $outfh, $output_file, $cache_dir, $mirror_url, $verbose,
2908b263 203 $use_diff, $reverse, $diff_opts, $wanted_upstreams) = @_;
cb097e7a
DM
204
205
206 # first, make sure we have a directory where they can all be untarred,
207 # and if its a permanent directory, clear any previous content
208 my $untar_dir;
209 if ($cache_dir) {
e66db76d 210 $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
cb097e7a
DM
211 if (-d $untar_dir) {
212 File::Path::rmtree($untar_dir)
213 or die "failed to remove $untar_dir\n";
214 }
215 mkdir $untar_dir
216 or die "mkdir $untar_dir: $!\n";
217 }
218 else {
219 $untar_dir = File::Temp::tempdir( CLEANUP => 1 );
220 }
221
222 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
223
224 my %seen_dist;
225 for my $module (@$modules) {
f0ce33d7 226 warn "Processing $module ...\n" if defined $output_file;
cb097e7a 227
e66db76d 228 my $m = $Maintainers::Modules{$module}
cb097e7a
DM
229 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
230
231 unless ($m->{CPAN}) {
232 print $outfh "WARNING: $module is not dual-life; skipping\n";
233 next;
234 }
235
236 my $dist = $m->{DISTRIBUTION};
237 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
238
239 if ($seen_dist{$dist}) {
240 warn "WARNING: duplicate entry for $dist in $module\n"
241 }
d55832d0 242
2908b263
RS
243 my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
244 next if @$wanted_upstreams and ! ($upstream ~~ $wanted_upstreams);
9546b88d 245 print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n" unless $use_diff;
d55832d0
JV
246 print $outfh " upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n";
247
cb097e7a
DM
248 $seen_dist{$dist}++;
249
8c814d1a
DM
250 my $cpan_dir;
251 eval {
10be9a51 252 $cpan_dir = get_distribution($cache_dir, $mirror_url, $untar_dir, $module, $dist)
8c814d1a
DM
253 };
254 if ($@) {
255 print $outfh " ", $@;
256 print $outfh " (skipping)\n";
257 next;
258 }
cb097e7a
DM
259
260 my @perl_files = Maintainers::get_module_files($module);
261
262 my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
263 die "ERROR: no such file: $manifest\n" unless -f $manifest;
264
265 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
266 my @cpan_files = sort keys %$cpan_files;
267
268 my ($excluded, $map) = get_map($m, $module, \@perl_files);
269
270 my %perl_unseen;
271 @perl_unseen{@perl_files} = ();
272 my %perl_files = %perl_unseen;
273
274 foreach my $cpan_file (@cpan_files) {
275 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
276 unless (defined $mapped_file) {
277 print $outfh " Excluded: $cpan_file\n" if $verbose;
278 next;
279 }
280
281 if (exists $perl_files{$mapped_file}) {
282 delete $perl_unseen{$mapped_file};
283 }
284 else {
285 # some CPAN files foo are stored in core as foo.packed,
286 # which are then unpacked by 'make test_prep'
287 my $packed_file = "$mapped_file.packed";
288 if (exists $perl_files{$packed_file} ) {
289 if (! -f $mapped_file and -f $packed_file) {
290 print $outfh <<EOF;
291WARNING: $mapped_file not found, but .packed variant exists.
292Perhaps you need to run 'make test_prep'?
293EOF
294 next;
295 }
296 delete $perl_unseen{$packed_file};
297 }
298 else {
299 if ($ignorable{$cpan_file}) {
300 print $outfh " Ignored: $cpan_file\n" if $verbose;
301 next;
302 }
303
304 unless ($use_diff) {
305 print $outfh " CPAN only: $cpan_file",
306 ($cpan_file eq $mapped_file) ? "\n"
307 : " (expected $mapped_file)\n";
308 }
309 next;
310 }
311 }
312
313
314 my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
315
316 # should never happen
317 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
318
319 # might happen if the FILES entry in Maintainers.pl is wrong
320 unless (-f $mapped_file) {
321 print $outfh "WARNING: perl file not found: $mapped_file\n";
322 next;
323 }
324
9546b88d
JV
325 my $relative_mapped_file = $mapped_file;
326 $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///;
cb097e7a
DM
327
328 if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
9546b88d
JV
329
330
331 if ($use_diff) {
cb097e7a
DM
332 file_diff($outfh, $abs_cpan_file, $mapped_file,
333 $reverse, $diff_opts);
334 }
335 else {
9546b88d
JV
336 if ($cpan_file eq $relative_mapped_file) {
337 print $outfh " Modified: $relative_mapped_file\n";
cb097e7a
DM
338 }
339 else {
9546b88d 340 print $outfh " Modified: $cpan_file $relative_mapped_file\n";
cb097e7a
DM
341 }
342 }
343 }
344 elsif ($verbose) {
9546b88d 345 if ($cpan_file eq $relative_mapped_file) {
cb097e7a
DM
346 print $outfh " Unchanged: $cpan_file\n";
347 }
348 else {
9546b88d 349 print $outfh " Unchanged: $cpan_file $relative_mapped_file\n";
cb097e7a
DM
350 }
351 }
352 }
353 for (sort keys %perl_unseen) {
354 print $outfh " Perl only: $_\n" unless $use_diff;
355 }
356 }
357}
358
359# given FooBar-1.23_45.tar.gz, return FooBar
360
361sub distro_base {
362 my $d = shift;
363 $d =~ s/\.tar\.gz$//;
364 $d =~ s/\.gip$//;
365 $d =~ s/[\d\-_\.]+$//;
366 return $d;
367}
368
369# process --crosscheck action:
370# ie list all distributions whose CPAN versions differ from that listed in
371# Maintainers.pl
372
373sub do_crosscheck {
10be9a51 374 my ($outfh, $cache_dir, $mirror_url, $force, $modules) = @_;
cb097e7a
DM
375
376 my $file = '02packages.details.txt';
377 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
378 my $path = File::Spec->catfile($download_dir, $file);
379 my $gzfile = "$path.gz";
380
381 # grab 02packages.details.txt
382
10be9a51 383 my $url = cpan_url($mirror_url, "modules/02packages.details.txt.gz");
cb097e7a
DM
384
385 if (! -f $gzfile or $force) {
386 unlink $gzfile;
387 my_getstore($url, $gzfile);
388 }
389 unlink $path;
390 IO::Uncompress::Gunzip::gunzip($gzfile, $path)
391 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
392
393 # suck in the data from it
e66db76d 394
cb097e7a
DM
395 open my $fh, '<', $path
396 or die "ERROR: open: $file: $!\n";
397
398 my %distros;
399 my %modules;
400
401 while (<$fh>) {
402 next if 1../^$/;
403 chomp;
404 my @f = split ' ', $_;
405 if (@f != 3) {
406 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
407 next;
408 }
cb097e7a 409 my $distro = $f[2];
f0ce33d7
DM
410 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
411 $modules{$f[0]} = $distro;
412
413 (my $short_distro = $distro) =~ s{^.*/}{};
cb097e7a 414
f0ce33d7 415 $distros{distro_base($short_distro)}{$distro} = 1;
cb097e7a
DM
416 }
417
418 for my $module (@$modules) {
e66db76d 419 my $m = $Maintainers::Modules{$module}
cb097e7a
DM
420 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
421
422 unless ($m->{CPAN}) {
423 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
424 next;
425 }
426
f0ce33d7 427 # given an entry like
cb097e7a
DM
428 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
429 # first compare the module name against Foo::Bar, and failing that,
430 # against foo-bar
431
432 my $pdist = $m->{DISTRIBUTION};
433 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
cb097e7a
DM
434
435 my $cdist = $modules{$module};
f0ce33d7 436 (my $short_pdist = $pdist) =~ s{^.*/}{};
cb097e7a 437
f0ce33d7
DM
438 unless (defined $cdist) {
439 my $d = $distros{distro_base($short_pdist)};
cb097e7a
DM
440 unless (defined $d) {
441 print $outfh "\n$module: Can't determine current CPAN entry\n";
442 next;
443 }
444 if (keys %$d > 1) {
445 print $outfh "\n$module: (found more than one CPAN candidate):\n";
446 print $outfh " perl: $pdist\n";
447 print $outfh " CPAN: $_\n" for sort keys %$d;
448 next;
449 }
450 $cdist = (keys %$d)[0];
451 }
452
453 if ($cdist ne $pdist) {
454 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
455 }
456 }
457}
458
459
460
461# get the EXCLUDED and MAP entries for this module, or
462# make up defauts if they don't exist
463
464sub get_map {
465 my ($m, $module_name, $perl_files) = @_;
466
467 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
468
469 $excluded ||= [];
470
471 return $excluded, $map if $map;
472
473 # all files under ext/foo-bar (plus maybe some under t/lib)???
474
475 my $ext;
476 for (@$perl_files) {
a193a2db 477 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
cb097e7a
DM
478 if (defined $ext and $ext ne $1) {
479 # more than one ext/$ext/
480 undef $ext;
481 last;
482 }
483 $ext = $1;
484 }
485 elsif (m{^t/lib/}) {
486 next;
487 }
488 else {
489 undef $ext;
490 last;
491 }
492 }
e66db76d 493
cb097e7a
DM
494 if (defined $ext) {
495 $map = { '' => $ext },
496 }
497 else {
498 (my $base = $module_name) =~ s{::}{/}g;
499 $base ="lib/$base";
500 $map = {
501 'lib/' => 'lib/',
502 '' => "$base/",
503 };
504 }
505 return $excluded, $map;
506}
507
508
509# Given an exclude list and a mapping hash, convert a CPAN filename
510# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
511# Returns an empty list for an excluded file
512
513sub cpan_to_perl {
514 my ($excluded, $map, $cpan_file) = @_;
515
516 for my $exclude (@$excluded) {
517 # may be a simple string to match exactly, or a pattern
518 if (ref $exclude) {
519 return if $cpan_file =~ $exclude;
520 }
521 else {
522 return if $cpan_file eq $exclude;
523 }
524 }
525
526 my $perl_file = $cpan_file;
527
528 # try longest prefix first, then alphabetically on tie-break
529 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
530 {
531 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
532 }
533 return $perl_file;
534}
535
536
537
538# do LWP::Simple::getstore, possibly without LWP::Simple being available
539
540my $lwp_simple_available;
541
542sub my_getstore {
543 my ($url, $file) = @_;
a08d2aad 544 File::Path::mkpath(File::Basename::dirname($file));
cb097e7a
DM
545 unless (defined $lwp_simple_available) {
546 eval { require LWP::Simple };
547 $lwp_simple_available = $@ eq '';
548 }
549 if ($lwp_simple_available) {
550 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
551 }
a08d2aad
DG
552 elsif ( $url =~ qr{\Afile://(?:localhost)?/} ) {
553 (my $local_path = $url) =~ s{\Afile://(?:localhost)?}{};
554 File::Copy::copy( $local_path, $file );
555 }
cb097e7a
DM
556 else {
557 return system(WGET_CMD, "-O", $file, $url) == 0;
558 }
559}
560
561
562# download and unpack a distribution
563# Returns the full pathname of the extracted directory
564# (eg '/tmp/XYZ/Foo_bar-1.23')
565
10be9a51
DG
566# cache_dir: where to download the .tar.gz file to
567# mirror_url: CPAN mirror to download from
e66db76d 568# untar_dir: where to untar or unzup the file
10be9a51
DG
569# module: name of module
570# dist: name of the distribution
cb097e7a
DM
571
572sub get_distribution {
10be9a51 573 my ($cache_dir, $mirror_url, $untar_dir, $module, $dist) = @_;
cb097e7a
DM
574
575 $dist =~ m{.+/([^/]+)$}
8c814d1a 576 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
cb097e7a
DM
577 my $filename = $1;
578
579 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
580 my $download_file = File::Spec->catfile($download_dir, $filename);
581
582 # download distribution
583
584 if (-f $download_file and ! -s $download_file ) {
585 # wget can leave a zero-length file on failed download
586 unlink $download_file;
587 }
588
589 unless (-f $download_file) {
590 # not cached
591 $dist =~ /^([A-Z])([A-Z])/
8c814d1a 592 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
cb097e7a 593
10be9a51 594 my $url = cpan_url($mirror_url, "modules/by-authors/id/$1/$1$2/$dist");
cb097e7a 595 my_getstore($url, $download_file)
8c814d1a 596 or die "ERROR: Could not fetch '$url'\n";
cb097e7a
DM
597 }
598
599 # extract distribution
600
601 my $ae = Archive::Extract->new( archive => $download_file);
602 $ae->extract( to => $untar_dir )
8c814d1a 603 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n";
cb097e7a
DM
604
605 # get the name of the extracted distribution dir
606
607 my $path = File::Spec->catfile($untar_dir, $filename);
608
609 $path =~ s/\.tar\.gz$// or
610 $path =~ s/\.zip$// or
611 die "ERROR: downloaded file does not have a recognised suffix: $path\n";
612
613 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
614
615 return $path;
616}
617
618
619# produce the diff of a single file
620sub file_diff {
621 my $outfh = shift;
622 my $cpan_file = shift;
623 my $perl_file = shift;
624 my $reverse = shift;
625 my $diff_opts = shift;
626
627
628 my @cmd = (DIFF_CMD, split ' ', $diff_opts);
629 if ($reverse) {
630 push @cmd, $perl_file, $cpan_file;
631 }
632 else {
633 push @cmd, $cpan_file, $perl_file;
634 }
635 my $result = `@cmd`;
636
637 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
638
639 print $outfh $result;
640}
641
642
643run();
644