This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
For miniperl, disable PERL_UNICODE and -C.
[perl5.git] / Porting / core-cpan-diff
CommitLineData
cb097e7a
DM
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
6
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
13use File::Temp ();
14use File::Path ();
15use File::Spec;
16use Archive::Extract;
17use IO::Uncompress::Gunzip ();
18use File::Compare ();
19use ExtUtils::Manifest;
20
21BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
22use lib 'Porting';
23use Maintainers ();
24
25# if running from blead, we may be doing -Ilib, which means when we
26# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
27# So preload the things we need, and tell it to check %INC first:
28
29use Archive::Tar;
30use IPC::Open3;
31use IO::Select;
32$Module::Load::Conditional::CHECK_INC_HASH = 1;
33# stop Archive::Extract whinging about lack of Archive::Zip
34$Archive::Extract::WARN = 0;
35
36
37# Files, which if they exist in CPAN but not in perl, will not generate
38# an 'Only in CPAN' listing
39#
40our %IGNORABLE = map { ($_ => 1) }
41 qw(.cvsignore .dualLivedDiffConfig .gitignore
42 ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
43 CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
44 GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
45 MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
46 SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
47
48# where, under the cache dir, to untar stuff to
49
50use constant UNTAR_DIR => 'untarred';
51
52use constant DIFF_CMD => 'diff';
53use constant WGET_CMD => 'wget';
54
55sub usage {
56 print STDERR "\n@_\n\n" if @_;
57 print STDERR <<HERE;
58Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
59
60-a/--all Scan all dual-life modules.
61
62-c/--cachedir Where to save downloaded CPAN tarball files
63 (defaults to /tmp/something/ with deletion after each run).
64
65-d/--diff Display file differences using diff(1), rather than just
66 listing which files have changed.
67 The diff(1) command is assumed to be in your PATH.
68
69--diffopts Options to pass to the diff command. Defaults to '-u'.
70
71-f|force Force download from CPAN of new 02packages.details.txt file
72 (with --crosscheck only).
73
74-o/--output File name to write output to (defaults to STDOUT).
75
76-r/--reverse Reverses the diff (perl to CPAN).
77
78-v/--verbose List the fate of *all* files in the tarball, not just those
79 that differ or are missing.
80
81-x|crosscheck List the distributions whose current CPAN version differs from
82 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
83
84By default (i.e. without the --crosscheck option), for each listed module
85(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
86from CPAN associated with that module, and compare the files in it with
87those in the perl source tree.
88
89Must be run from the root of the perl source tree.
90Module names must match the keys of %Modules in Maintainers.pl.
91HERE
92 exit(1);
93}
94
95
96sub run {
97 my $scan_all;
98 my $diff_opts;
99 my $reverse = 0;
100 my $cache_dir;
101 my $use_diff;
102 my $output_file;
103 my $verbose;
104 my $force;
105 my $do_crosscheck;
106
107 GetOptions(
108 'a|all' => \$scan_all,
109 'c|cachedir=s' => \$cache_dir,
110 'd|diff' => \$use_diff,
111 'diffopts:s' => \$diff_opts,
112 'f|force' => \$force,
113 'h|help' => \&usage,
114 'o|output=s' => \$output_file,
115 'r|reverse' => \$reverse,
116 'v|verbose' => \$verbose,
117 'x|crosscheck' => \$do_crosscheck,
118 ) or usage;
119
120
121 my @modules;
122
123 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
124
125 if ($do_crosscheck) {
126 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
127 if ($reverse || $use_diff || $diff_opts || $verbose);
128 }
129 else {
130 $diff_opts = '-u' unless defined $diff_opts;
131 usage("can't use -f without --crosscheck") if $force;
132 }
133
134 @modules = $scan_all
135 ? grep $Maintainers::Modules{$_}{CPAN},
136 (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
137 : @ARGV;
138 usage("No modules specified") unless @modules;
139
140
141 my $outfh;
142 if (defined $output_file) {
143 open $outfh, '>', $output_file
8c814d1a 144 or die "ERROR: could not open file '$output_file' for writing: $!\n";
cb097e7a
DM
145 }
146 else {
147 open $outfh, ">&STDOUT"
8c814d1a 148 or die "ERROR: can't dup STDOUT: $!\n";
cb097e7a
DM
149 }
150
151 if (defined $cache_dir) {
152 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
153 }
154
155 if ($do_crosscheck) {
156 do_crosscheck($outfh, $cache_dir, $force, \@modules);
157 }
158 else {
f0ce33d7 159 do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff,
cb097e7a
DM
160 $reverse, $diff_opts);
161 }
162}
163
164
165
166# compare a list of modules against their CPAN equivalents
167
168sub do_compare {
f0ce33d7 169 my ($modules, $outfh, $output_file, $cache_dir, $verbose,
cb097e7a
DM
170 $use_diff, $reverse, $diff_opts) = @_;
171
172
173 # first, make sure we have a directory where they can all be untarred,
174 # and if its a permanent directory, clear any previous content
175 my $untar_dir;
176 if ($cache_dir) {
177 $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
178 if (-d $untar_dir) {
179 File::Path::rmtree($untar_dir)
180 or die "failed to remove $untar_dir\n";
181 }
182 mkdir $untar_dir
183 or die "mkdir $untar_dir: $!\n";
184 }
185 else {
186 $untar_dir = File::Temp::tempdir( CLEANUP => 1 );
187 }
188
189 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
190
191 my %seen_dist;
192 for my $module (@$modules) {
f0ce33d7 193 warn "Processing $module ...\n" if defined $output_file;
cb097e7a
DM
194 print $outfh "\n$module\n" unless $use_diff;
195
196 my $m = $Maintainers::Modules{$module}
197 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
198
199 unless ($m->{CPAN}) {
200 print $outfh "WARNING: $module is not dual-life; skipping\n";
201 next;
202 }
203
204 my $dist = $m->{DISTRIBUTION};
205 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
206
207 if ($seen_dist{$dist}) {
208 warn "WARNING: duplicate entry for $dist in $module\n"
209 }
210 $seen_dist{$dist}++;
211
8c814d1a
DM
212 my $cpan_dir;
213 eval {
214 $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist)
215 };
216 if ($@) {
217 print $outfh " ", $@;
218 print $outfh " (skipping)\n";
219 next;
220 }
cb097e7a
DM
221
222 my @perl_files = Maintainers::get_module_files($module);
223
224 my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
225 die "ERROR: no such file: $manifest\n" unless -f $manifest;
226
227 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
228 my @cpan_files = sort keys %$cpan_files;
229
230 my ($excluded, $map) = get_map($m, $module, \@perl_files);
231
232 my %perl_unseen;
233 @perl_unseen{@perl_files} = ();
234 my %perl_files = %perl_unseen;
235
236 foreach my $cpan_file (@cpan_files) {
237 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
238 unless (defined $mapped_file) {
239 print $outfh " Excluded: $cpan_file\n" if $verbose;
240 next;
241 }
242
243 if (exists $perl_files{$mapped_file}) {
244 delete $perl_unseen{$mapped_file};
245 }
246 else {
247 # some CPAN files foo are stored in core as foo.packed,
248 # which are then unpacked by 'make test_prep'
249 my $packed_file = "$mapped_file.packed";
250 if (exists $perl_files{$packed_file} ) {
251 if (! -f $mapped_file and -f $packed_file) {
252 print $outfh <<EOF;
253WARNING: $mapped_file not found, but .packed variant exists.
254Perhaps you need to run 'make test_prep'?
255EOF
256 next;
257 }
258 delete $perl_unseen{$packed_file};
259 }
260 else {
261 if ($ignorable{$cpan_file}) {
262 print $outfh " Ignored: $cpan_file\n" if $verbose;
263 next;
264 }
265
266 unless ($use_diff) {
267 print $outfh " CPAN only: $cpan_file",
268 ($cpan_file eq $mapped_file) ? "\n"
269 : " (expected $mapped_file)\n";
270 }
271 next;
272 }
273 }
274
275
276 my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
277
278 # should never happen
279 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
280
281 # might happen if the FILES entry in Maintainers.pl is wrong
282 unless (-f $mapped_file) {
283 print $outfh "WARNING: perl file not found: $mapped_file\n";
284 next;
285 }
286
287
288 if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
289 if ($use_diff) {
290 file_diff($outfh, $abs_cpan_file, $mapped_file,
291 $reverse, $diff_opts);
292 }
293 else {
294 if ($cpan_file eq $mapped_file) {
295 print $outfh " Modified: $cpan_file\n";
296 }
297 else {
298 print $outfh " Modified: $cpan_file $mapped_file\n";
299 }
300 }
301 }
302 elsif ($verbose) {
303 if ($cpan_file eq $mapped_file) {
304 print $outfh " Unchanged: $cpan_file\n";
305 }
306 else {
307 print $outfh " Unchanged: $cpan_file $mapped_file\n";
308 }
309 }
310 }
311 for (sort keys %perl_unseen) {
312 print $outfh " Perl only: $_\n" unless $use_diff;
313 }
314 }
315}
316
317# given FooBar-1.23_45.tar.gz, return FooBar
318
319sub distro_base {
320 my $d = shift;
321 $d =~ s/\.tar\.gz$//;
322 $d =~ s/\.gip$//;
323 $d =~ s/[\d\-_\.]+$//;
324 return $d;
325}
326
327# process --crosscheck action:
328# ie list all distributions whose CPAN versions differ from that listed in
329# Maintainers.pl
330
331sub do_crosscheck {
332 my ($outfh, $cache_dir, $force, $modules) = @_;
333
334 my $file = '02packages.details.txt';
335 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
336 my $path = File::Spec->catfile($download_dir, $file);
337 my $gzfile = "$path.gz";
338
339 # grab 02packages.details.txt
340
341 my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz';
342
343 if (! -f $gzfile or $force) {
344 unlink $gzfile;
345 my_getstore($url, $gzfile);
346 }
347 unlink $path;
348 IO::Uncompress::Gunzip::gunzip($gzfile, $path)
349 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
350
351 # suck in the data from it
352
353 open my $fh, '<', $path
354 or die "ERROR: open: $file: $!\n";
355
356 my %distros;
357 my %modules;
358
359 while (<$fh>) {
360 next if 1../^$/;
361 chomp;
362 my @f = split ' ', $_;
363 if (@f != 3) {
364 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
365 next;
366 }
cb097e7a 367 my $distro = $f[2];
f0ce33d7
DM
368 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
369 $modules{$f[0]} = $distro;
370
371 (my $short_distro = $distro) =~ s{^.*/}{};
cb097e7a 372
f0ce33d7 373 $distros{distro_base($short_distro)}{$distro} = 1;
cb097e7a
DM
374 }
375
376 for my $module (@$modules) {
377 my $m = $Maintainers::Modules{$module}
378 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
379
380 unless ($m->{CPAN}) {
381 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
382 next;
383 }
384
f0ce33d7 385 # given an entry like
cb097e7a
DM
386 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
387 # first compare the module name against Foo::Bar, and failing that,
388 # against foo-bar
389
390 my $pdist = $m->{DISTRIBUTION};
391 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
cb097e7a
DM
392
393 my $cdist = $modules{$module};
f0ce33d7 394 (my $short_pdist = $pdist) =~ s{^.*/}{};
cb097e7a 395
f0ce33d7
DM
396 unless (defined $cdist) {
397 my $d = $distros{distro_base($short_pdist)};
cb097e7a
DM
398 unless (defined $d) {
399 print $outfh "\n$module: Can't determine current CPAN entry\n";
400 next;
401 }
402 if (keys %$d > 1) {
403 print $outfh "\n$module: (found more than one CPAN candidate):\n";
404 print $outfh " perl: $pdist\n";
405 print $outfh " CPAN: $_\n" for sort keys %$d;
406 next;
407 }
408 $cdist = (keys %$d)[0];
409 }
410
411 if ($cdist ne $pdist) {
412 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
413 }
414 }
415}
416
417
418
419# get the EXCLUDED and MAP entries for this module, or
420# make up defauts if they don't exist
421
422sub get_map {
423 my ($m, $module_name, $perl_files) = @_;
424
425 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
426
427 $excluded ||= [];
428
429 return $excluded, $map if $map;
430
431 # all files under ext/foo-bar (plus maybe some under t/lib)???
432
433 my $ext;
434 for (@$perl_files) {
435 if (m{^(ext/[^/]+/)}) {
436 if (defined $ext and $ext ne $1) {
437 # more than one ext/$ext/
438 undef $ext;
439 last;
440 }
441 $ext = $1;
442 }
443 elsif (m{^t/lib/}) {
444 next;
445 }
446 else {
447 undef $ext;
448 last;
449 }
450 }
451
452 if (defined $ext) {
453 $map = { '' => $ext },
454 }
455 else {
456 (my $base = $module_name) =~ s{::}{/}g;
457 $base ="lib/$base";
458 $map = {
459 'lib/' => 'lib/',
460 '' => "$base/",
461 };
462 }
463 return $excluded, $map;
464}
465
466
467# Given an exclude list and a mapping hash, convert a CPAN filename
468# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
469# Returns an empty list for an excluded file
470
471sub cpan_to_perl {
472 my ($excluded, $map, $cpan_file) = @_;
473
474 for my $exclude (@$excluded) {
475 # may be a simple string to match exactly, or a pattern
476 if (ref $exclude) {
477 return if $cpan_file =~ $exclude;
478 }
479 else {
480 return if $cpan_file eq $exclude;
481 }
482 }
483
484 my $perl_file = $cpan_file;
485
486 # try longest prefix first, then alphabetically on tie-break
487 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
488 {
489 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
490 }
491 return $perl_file;
492}
493
494
495
496# do LWP::Simple::getstore, possibly without LWP::Simple being available
497
498my $lwp_simple_available;
499
500sub my_getstore {
501 my ($url, $file) = @_;
502 unless (defined $lwp_simple_available) {
503 eval { require LWP::Simple };
504 $lwp_simple_available = $@ eq '';
505 }
506 if ($lwp_simple_available) {
507 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
508 }
509 else {
510 return system(WGET_CMD, "-O", $file, $url) == 0;
511 }
512}
513
514
515# download and unpack a distribution
516# Returns the full pathname of the extracted directory
517# (eg '/tmp/XYZ/Foo_bar-1.23')
518
519# cache_dir: where to dowenload the .tar.gz file to
520# untar_dir: where to untar or unzup the file
521# module: name of module
522# dist: name of the distribution
523
524sub get_distribution {
525 my ($cache_dir, $untar_dir, $module, $dist) = @_;
526
527 $dist =~ m{.+/([^/]+)$}
8c814d1a 528 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
cb097e7a
DM
529 my $filename = $1;
530
531 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
532 my $download_file = File::Spec->catfile($download_dir, $filename);
533
534 # download distribution
535
536 if (-f $download_file and ! -s $download_file ) {
537 # wget can leave a zero-length file on failed download
538 unlink $download_file;
539 }
540
541 unless (-f $download_file) {
542 # not cached
543 $dist =~ /^([A-Z])([A-Z])/
8c814d1a 544 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
cb097e7a
DM
545
546 my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist";
547 my_getstore($url, $download_file)
8c814d1a 548 or die "ERROR: Could not fetch '$url'\n";
cb097e7a
DM
549 }
550
551 # extract distribution
552
553 my $ae = Archive::Extract->new( archive => $download_file);
554 $ae->extract( to => $untar_dir )
8c814d1a 555 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n";
cb097e7a
DM
556
557 # get the name of the extracted distribution dir
558
559 my $path = File::Spec->catfile($untar_dir, $filename);
560
561 $path =~ s/\.tar\.gz$// or
562 $path =~ s/\.zip$// or
563 die "ERROR: downloaded file does not have a recognised suffix: $path\n";
564
565 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
566
567 return $path;
568}
569
570
571# produce the diff of a single file
572sub file_diff {
573 my $outfh = shift;
574 my $cpan_file = shift;
575 my $perl_file = shift;
576 my $reverse = shift;
577 my $diff_opts = shift;
578
579
580 my @cmd = (DIFF_CMD, split ' ', $diff_opts);
581 if ($reverse) {
582 push @cmd, $perl_file, $cpan_file;
583 }
584 else {
585 push @cmd, $cpan_file, $perl_file;
586 }
587 my $result = `@cmd`;
588
589 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
590
591 print $outfh $result;
592}
593
594
595run();
596