This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Implement facility to plug in syntax triggered by keywords
[perl5.git] / Porting / core-cpan-diff
CommitLineData
cb097e7a
DM
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
6
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
13use File::Temp ();
14use File::Path ();
15use File::Spec;
16use Archive::Extract;
17use IO::Uncompress::Gunzip ();
18use File::Compare ();
19use ExtUtils::Manifest;
20
21BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
22use lib 'Porting';
23use Maintainers ();
24
25# if running from blead, we may be doing -Ilib, which means when we
26# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
27# So preload the things we need, and tell it to check %INC first:
28
29use Archive::Tar;
30use IPC::Open3;
31use IO::Select;
32$Module::Load::Conditional::CHECK_INC_HASH = 1;
33# stop Archive::Extract whinging about lack of Archive::Zip
34$Archive::Extract::WARN = 0;
35
36
37# Files, which if they exist in CPAN but not in perl, will not generate
38# an 'Only in CPAN' listing
39#
40our %IGNORABLE = map { ($_ => 1) }
41 qw(.cvsignore .dualLivedDiffConfig .gitignore
42 ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
43 CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
44 GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
45 MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
46 SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
47
48# where, under the cache dir, to untar stuff to
49
50use constant UNTAR_DIR => 'untarred';
51
52use constant DIFF_CMD => 'diff';
53use constant WGET_CMD => 'wget';
54
55sub usage {
56 print STDERR "\n@_\n\n" if @_;
57 print STDERR <<HERE;
58Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
59
60-a/--all Scan all dual-life modules.
61
62-c/--cachedir Where to save downloaded CPAN tarball files
63 (defaults to /tmp/something/ with deletion after each run).
64
65-d/--diff Display file differences using diff(1), rather than just
66 listing which files have changed.
67 The diff(1) command is assumed to be in your PATH.
68
69--diffopts Options to pass to the diff command. Defaults to '-u'.
70
71-f|force Force download from CPAN of new 02packages.details.txt file
72 (with --crosscheck only).
73
74-o/--output File name to write output to (defaults to STDOUT).
75
76-r/--reverse Reverses the diff (perl to CPAN).
77
78-v/--verbose List the fate of *all* files in the tarball, not just those
79 that differ or are missing.
80
81-x|crosscheck List the distributions whose current CPAN version differs from
82 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
83
84By default (i.e. without the --crosscheck option), for each listed module
85(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
86from CPAN associated with that module, and compare the files in it with
87those in the perl source tree.
88
89Must be run from the root of the perl source tree.
90Module names must match the keys of %Modules in Maintainers.pl.
91HERE
92 exit(1);
93}
94
95
96sub run {
97 my $scan_all;
98 my $diff_opts;
99 my $reverse = 0;
100 my $cache_dir;
101 my $use_diff;
102 my $output_file;
103 my $verbose;
104 my $force;
105 my $do_crosscheck;
106
107 GetOptions(
108 'a|all' => \$scan_all,
109 'c|cachedir=s' => \$cache_dir,
110 'd|diff' => \$use_diff,
111 'diffopts:s' => \$diff_opts,
112 'f|force' => \$force,
113 'h|help' => \&usage,
114 'o|output=s' => \$output_file,
115 'r|reverse' => \$reverse,
116 'v|verbose' => \$verbose,
117 'x|crosscheck' => \$do_crosscheck,
118 ) or usage;
119
120
121 my @modules;
122
123 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
124
125 if ($do_crosscheck) {
126 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
127 if ($reverse || $use_diff || $diff_opts || $verbose);
128 }
129 else {
130 $diff_opts = '-u' unless defined $diff_opts;
131 usage("can't use -f without --crosscheck") if $force;
132 }
133
134 @modules = $scan_all
135 ? grep $Maintainers::Modules{$_}{CPAN},
136 (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
137 : @ARGV;
138 usage("No modules specified") unless @modules;
139
140
141 my $outfh;
142 if (defined $output_file) {
143 open $outfh, '>', $output_file
8c814d1a 144 or die "ERROR: could not open file '$output_file' for writing: $!\n";
cb097e7a
DM
145 }
146 else {
147 open $outfh, ">&STDOUT"
8c814d1a 148 or die "ERROR: can't dup STDOUT: $!\n";
cb097e7a
DM
149 }
150
151 if (defined $cache_dir) {
152 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
153 }
154
155 if ($do_crosscheck) {
156 do_crosscheck($outfh, $cache_dir, $force, \@modules);
157 }
158 else {
f0ce33d7 159 do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff,
cb097e7a
DM
160 $reverse, $diff_opts);
161 }
162}
163
164
165
166# compare a list of modules against their CPAN equivalents
167
168sub do_compare {
f0ce33d7 169 my ($modules, $outfh, $output_file, $cache_dir, $verbose,
cb097e7a
DM
170 $use_diff, $reverse, $diff_opts) = @_;
171
172
173 # first, make sure we have a directory where they can all be untarred,
174 # and if its a permanent directory, clear any previous content
175 my $untar_dir;
176 if ($cache_dir) {
177 $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
178 if (-d $untar_dir) {
179 File::Path::rmtree($untar_dir)
180 or die "failed to remove $untar_dir\n";
181 }
182 mkdir $untar_dir
183 or die "mkdir $untar_dir: $!\n";
184 }
185 else {
186 $untar_dir = File::Temp::tempdir( CLEANUP => 1 );
187 }
188
189 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
190
191 my %seen_dist;
192 for my $module (@$modules) {
f0ce33d7 193 warn "Processing $module ...\n" if defined $output_file;
cb097e7a
DM
194
195 my $m = $Maintainers::Modules{$module}
196 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
197
198 unless ($m->{CPAN}) {
199 print $outfh "WARNING: $module is not dual-life; skipping\n";
200 next;
201 }
202
203 my $dist = $m->{DISTRIBUTION};
204 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
205
206 if ($seen_dist{$dist}) {
207 warn "WARNING: duplicate entry for $dist in $module\n"
208 }
d55832d0 209
9546b88d 210 print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n" unless $use_diff;
d55832d0
JV
211 print $outfh " upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n";
212
cb097e7a
DM
213 $seen_dist{$dist}++;
214
8c814d1a
DM
215 my $cpan_dir;
216 eval {
217 $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist)
218 };
219 if ($@) {
220 print $outfh " ", $@;
221 print $outfh " (skipping)\n";
222 next;
223 }
cb097e7a
DM
224
225 my @perl_files = Maintainers::get_module_files($module);
226
227 my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
228 die "ERROR: no such file: $manifest\n" unless -f $manifest;
229
230 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
231 my @cpan_files = sort keys %$cpan_files;
232
233 my ($excluded, $map) = get_map($m, $module, \@perl_files);
234
235 my %perl_unseen;
236 @perl_unseen{@perl_files} = ();
237 my %perl_files = %perl_unseen;
238
239 foreach my $cpan_file (@cpan_files) {
240 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
241 unless (defined $mapped_file) {
242 print $outfh " Excluded: $cpan_file\n" if $verbose;
243 next;
244 }
245
246 if (exists $perl_files{$mapped_file}) {
247 delete $perl_unseen{$mapped_file};
248 }
249 else {
250 # some CPAN files foo are stored in core as foo.packed,
251 # which are then unpacked by 'make test_prep'
252 my $packed_file = "$mapped_file.packed";
253 if (exists $perl_files{$packed_file} ) {
254 if (! -f $mapped_file and -f $packed_file) {
255 print $outfh <<EOF;
256WARNING: $mapped_file not found, but .packed variant exists.
257Perhaps you need to run 'make test_prep'?
258EOF
259 next;
260 }
261 delete $perl_unseen{$packed_file};
262 }
263 else {
264 if ($ignorable{$cpan_file}) {
265 print $outfh " Ignored: $cpan_file\n" if $verbose;
266 next;
267 }
268
269 unless ($use_diff) {
270 print $outfh " CPAN only: $cpan_file",
271 ($cpan_file eq $mapped_file) ? "\n"
272 : " (expected $mapped_file)\n";
273 }
274 next;
275 }
276 }
277
278
279 my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
280
281 # should never happen
282 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
283
284 # might happen if the FILES entry in Maintainers.pl is wrong
285 unless (-f $mapped_file) {
286 print $outfh "WARNING: perl file not found: $mapped_file\n";
287 next;
288 }
289
9546b88d
JV
290 my $relative_mapped_file = $mapped_file;
291 $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///;
cb097e7a
DM
292
293 if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
9546b88d
JV
294
295
296 if ($use_diff) {
cb097e7a
DM
297 file_diff($outfh, $abs_cpan_file, $mapped_file,
298 $reverse, $diff_opts);
299 }
300 else {
9546b88d
JV
301 if ($cpan_file eq $relative_mapped_file) {
302 print $outfh " Modified: $relative_mapped_file\n";
cb097e7a
DM
303 }
304 else {
9546b88d 305 print $outfh " Modified: $cpan_file $relative_mapped_file\n";
cb097e7a
DM
306 }
307 }
308 }
309 elsif ($verbose) {
9546b88d 310 if ($cpan_file eq $relative_mapped_file) {
cb097e7a
DM
311 print $outfh " Unchanged: $cpan_file\n";
312 }
313 else {
9546b88d 314 print $outfh " Unchanged: $cpan_file $relative_mapped_file\n";
cb097e7a
DM
315 }
316 }
317 }
318 for (sort keys %perl_unseen) {
319 print $outfh " Perl only: $_\n" unless $use_diff;
320 }
321 }
322}
323
324# given FooBar-1.23_45.tar.gz, return FooBar
325
326sub distro_base {
327 my $d = shift;
328 $d =~ s/\.tar\.gz$//;
329 $d =~ s/\.gip$//;
330 $d =~ s/[\d\-_\.]+$//;
331 return $d;
332}
333
334# process --crosscheck action:
335# ie list all distributions whose CPAN versions differ from that listed in
336# Maintainers.pl
337
338sub do_crosscheck {
339 my ($outfh, $cache_dir, $force, $modules) = @_;
340
341 my $file = '02packages.details.txt';
342 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
343 my $path = File::Spec->catfile($download_dir, $file);
344 my $gzfile = "$path.gz";
345
346 # grab 02packages.details.txt
347
348 my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz';
349
350 if (! -f $gzfile or $force) {
351 unlink $gzfile;
352 my_getstore($url, $gzfile);
353 }
354 unlink $path;
355 IO::Uncompress::Gunzip::gunzip($gzfile, $path)
356 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
357
358 # suck in the data from it
359
360 open my $fh, '<', $path
361 or die "ERROR: open: $file: $!\n";
362
363 my %distros;
364 my %modules;
365
366 while (<$fh>) {
367 next if 1../^$/;
368 chomp;
369 my @f = split ' ', $_;
370 if (@f != 3) {
371 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
372 next;
373 }
cb097e7a 374 my $distro = $f[2];
f0ce33d7
DM
375 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
376 $modules{$f[0]} = $distro;
377
378 (my $short_distro = $distro) =~ s{^.*/}{};
cb097e7a 379
f0ce33d7 380 $distros{distro_base($short_distro)}{$distro} = 1;
cb097e7a
DM
381 }
382
383 for my $module (@$modules) {
384 my $m = $Maintainers::Modules{$module}
385 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
386
387 unless ($m->{CPAN}) {
388 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
389 next;
390 }
391
f0ce33d7 392 # given an entry like
cb097e7a
DM
393 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
394 # first compare the module name against Foo::Bar, and failing that,
395 # against foo-bar
396
397 my $pdist = $m->{DISTRIBUTION};
398 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
cb097e7a
DM
399
400 my $cdist = $modules{$module};
f0ce33d7 401 (my $short_pdist = $pdist) =~ s{^.*/}{};
cb097e7a 402
f0ce33d7
DM
403 unless (defined $cdist) {
404 my $d = $distros{distro_base($short_pdist)};
cb097e7a
DM
405 unless (defined $d) {
406 print $outfh "\n$module: Can't determine current CPAN entry\n";
407 next;
408 }
409 if (keys %$d > 1) {
410 print $outfh "\n$module: (found more than one CPAN candidate):\n";
411 print $outfh " perl: $pdist\n";
412 print $outfh " CPAN: $_\n" for sort keys %$d;
413 next;
414 }
415 $cdist = (keys %$d)[0];
416 }
417
418 if ($cdist ne $pdist) {
419 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
420 }
421 }
422}
423
424
425
426# get the EXCLUDED and MAP entries for this module, or
427# make up defauts if they don't exist
428
429sub get_map {
430 my ($m, $module_name, $perl_files) = @_;
431
432 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
433
434 $excluded ||= [];
435
436 return $excluded, $map if $map;
437
438 # all files under ext/foo-bar (plus maybe some under t/lib)???
439
440 my $ext;
441 for (@$perl_files) {
a193a2db 442 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
cb097e7a
DM
443 if (defined $ext and $ext ne $1) {
444 # more than one ext/$ext/
445 undef $ext;
446 last;
447 }
448 $ext = $1;
449 }
450 elsif (m{^t/lib/}) {
451 next;
452 }
453 else {
454 undef $ext;
455 last;
456 }
457 }
458
459 if (defined $ext) {
460 $map = { '' => $ext },
461 }
462 else {
463 (my $base = $module_name) =~ s{::}{/}g;
464 $base ="lib/$base";
465 $map = {
466 'lib/' => 'lib/',
467 '' => "$base/",
468 };
469 }
470 return $excluded, $map;
471}
472
473
474# Given an exclude list and a mapping hash, convert a CPAN filename
475# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
476# Returns an empty list for an excluded file
477
478sub cpan_to_perl {
479 my ($excluded, $map, $cpan_file) = @_;
480
481 for my $exclude (@$excluded) {
482 # may be a simple string to match exactly, or a pattern
483 if (ref $exclude) {
484 return if $cpan_file =~ $exclude;
485 }
486 else {
487 return if $cpan_file eq $exclude;
488 }
489 }
490
491 my $perl_file = $cpan_file;
492
493 # try longest prefix first, then alphabetically on tie-break
494 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
495 {
496 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
497 }
498 return $perl_file;
499}
500
501
502
503# do LWP::Simple::getstore, possibly without LWP::Simple being available
504
505my $lwp_simple_available;
506
507sub my_getstore {
508 my ($url, $file) = @_;
509 unless (defined $lwp_simple_available) {
510 eval { require LWP::Simple };
511 $lwp_simple_available = $@ eq '';
512 }
513 if ($lwp_simple_available) {
514 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
515 }
516 else {
517 return system(WGET_CMD, "-O", $file, $url) == 0;
518 }
519}
520
521
522# download and unpack a distribution
523# Returns the full pathname of the extracted directory
524# (eg '/tmp/XYZ/Foo_bar-1.23')
525
526# cache_dir: where to dowenload the .tar.gz file to
527# untar_dir: where to untar or unzup the file
528# module: name of module
529# dist: name of the distribution
530
531sub get_distribution {
532 my ($cache_dir, $untar_dir, $module, $dist) = @_;
533
534 $dist =~ m{.+/([^/]+)$}
8c814d1a 535 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
cb097e7a
DM
536 my $filename = $1;
537
538 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
539 my $download_file = File::Spec->catfile($download_dir, $filename);
540
541 # download distribution
542
543 if (-f $download_file and ! -s $download_file ) {
544 # wget can leave a zero-length file on failed download
545 unlink $download_file;
546 }
547
548 unless (-f $download_file) {
549 # not cached
550 $dist =~ /^([A-Z])([A-Z])/
8c814d1a 551 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
cb097e7a
DM
552
553 my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist";
554 my_getstore($url, $download_file)
8c814d1a 555 or die "ERROR: Could not fetch '$url'\n";
cb097e7a
DM
556 }
557
558 # extract distribution
559
560 my $ae = Archive::Extract->new( archive => $download_file);
561 $ae->extract( to => $untar_dir )
8c814d1a 562 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n";
cb097e7a
DM
563
564 # get the name of the extracted distribution dir
565
566 my $path = File::Spec->catfile($untar_dir, $filename);
567
568 $path =~ s/\.tar\.gz$// or
569 $path =~ s/\.zip$// or
570 die "ERROR: downloaded file does not have a recognised suffix: $path\n";
571
572 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
573
574 return $path;
575}
576
577
578# produce the diff of a single file
579sub file_diff {
580 my $outfh = shift;
581 my $cpan_file = shift;
582 my $perl_file = shift;
583 my $reverse = shift;
584 my $diff_opts = shift;
585
586
587 my @cmd = (DIFF_CMD, split ' ', $diff_opts);
588 if ($reverse) {
589 push @cmd, $perl_file, $cpan_file;
590 }
591 else {
592 push @cmd, $cpan_file, $perl_file;
593 }
594 my $result = `@cmd`;
595
596 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
597
598 print $outfh $result;
599}
600
601
602run();
603