This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perl5133delta.pod: Notes on CvGV reference counting
[perl5.git] / Porting / core-cpan-diff
CommitLineData
cb097e7a
DM
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
6
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
13use File::Temp ();
14use File::Path ();
15use File::Spec;
16use Archive::Extract;
17use IO::Uncompress::Gunzip ();
18use File::Compare ();
19use ExtUtils::Manifest;
20
21BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
22use lib 'Porting';
23use Maintainers ();
24
25# if running from blead, we may be doing -Ilib, which means when we
26# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
27# So preload the things we need, and tell it to check %INC first:
28
29use Archive::Tar;
30use IPC::Open3;
31use IO::Select;
32$Module::Load::Conditional::CHECK_INC_HASH = 1;
33# stop Archive::Extract whinging about lack of Archive::Zip
34$Archive::Extract::WARN = 0;
35
36
37# Files, which if they exist in CPAN but not in perl, will not generate
38# an 'Only in CPAN' listing
39#
40our %IGNORABLE = map { ($_ => 1) }
41 qw(.cvsignore .dualLivedDiffConfig .gitignore
42 ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
43 CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
44 GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
45 MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
46 SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
47
48# where, under the cache dir, to untar stuff to
49
50use constant UNTAR_DIR => 'untarred';
51
52use constant DIFF_CMD => 'diff';
53use constant WGET_CMD => 'wget';
54
55sub usage {
56 print STDERR "\n@_\n\n" if @_;
57 print STDERR <<HERE;
58Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
59
60-a/--all Scan all dual-life modules.
61
62-c/--cachedir Where to save downloaded CPAN tarball files
63 (defaults to /tmp/something/ with deletion after each run).
64
65-d/--diff Display file differences using diff(1), rather than just
66 listing which files have changed.
67 The diff(1) command is assumed to be in your PATH.
68
69--diffopts Options to pass to the diff command. Defaults to '-u'.
70
71-f|force Force download from CPAN of new 02packages.details.txt file
72 (with --crosscheck only).
73
74-o/--output File name to write output to (defaults to STDOUT).
75
76-r/--reverse Reverses the diff (perl to CPAN).
77
2908b263
RS
78-u/--upstream only print modules with the given upstream (defaults to all)
79
cb097e7a
DM
80-v/--verbose List the fate of *all* files in the tarball, not just those
81 that differ or are missing.
82
83-x|crosscheck List the distributions whose current CPAN version differs from
84 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
85
86By default (i.e. without the --crosscheck option), for each listed module
87(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
88from CPAN associated with that module, and compare the files in it with
89those in the perl source tree.
90
91Must be run from the root of the perl source tree.
92Module names must match the keys of %Modules in Maintainers.pl.
93HERE
94 exit(1);
95}
96
97
98sub run {
99 my $scan_all;
100 my $diff_opts;
101 my $reverse = 0;
2908b263 102 my @wanted_upstreams;
cb097e7a
DM
103 my $cache_dir;
104 my $use_diff;
105 my $output_file;
106 my $verbose;
107 my $force;
108 my $do_crosscheck;
109
110 GetOptions(
111 'a|all' => \$scan_all,
112 'c|cachedir=s' => \$cache_dir,
113 'd|diff' => \$use_diff,
114 'diffopts:s' => \$diff_opts,
115 'f|force' => \$force,
116 'h|help' => \&usage,
117 'o|output=s' => \$output_file,
118 'r|reverse' => \$reverse,
2908b263 119 'u|upstream=s@'=> \@wanted_upstreams,
cb097e7a
DM
120 'v|verbose' => \$verbose,
121 'x|crosscheck' => \$do_crosscheck,
122 ) or usage;
123
124
125 my @modules;
126
127 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
128
129 if ($do_crosscheck) {
130 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
131 if ($reverse || $use_diff || $diff_opts || $verbose);
132 }
133 else {
134 $diff_opts = '-u' unless defined $diff_opts;
135 usage("can't use -f without --crosscheck") if $force;
136 }
137
138 @modules = $scan_all
139 ? grep $Maintainers::Modules{$_}{CPAN},
140 (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
141 : @ARGV;
142 usage("No modules specified") unless @modules;
143
144
145 my $outfh;
146 if (defined $output_file) {
147 open $outfh, '>', $output_file
8c814d1a 148 or die "ERROR: could not open file '$output_file' for writing: $!\n";
cb097e7a
DM
149 }
150 else {
151 open $outfh, ">&STDOUT"
8c814d1a 152 or die "ERROR: can't dup STDOUT: $!\n";
cb097e7a
DM
153 }
154
155 if (defined $cache_dir) {
156 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
157 }
158
159 if ($do_crosscheck) {
160 do_crosscheck($outfh, $cache_dir, $force, \@modules);
161 }
162 else {
f0ce33d7 163 do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff,
2908b263 164 $reverse, $diff_opts, \@wanted_upstreams);
cb097e7a
DM
165 }
166}
167
168
169
170# compare a list of modules against their CPAN equivalents
171
172sub do_compare {
f0ce33d7 173 my ($modules, $outfh, $output_file, $cache_dir, $verbose,
2908b263 174 $use_diff, $reverse, $diff_opts, $wanted_upstreams) = @_;
cb097e7a
DM
175
176
177 # first, make sure we have a directory where they can all be untarred,
178 # and if its a permanent directory, clear any previous content
179 my $untar_dir;
180 if ($cache_dir) {
181 $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
182 if (-d $untar_dir) {
183 File::Path::rmtree($untar_dir)
184 or die "failed to remove $untar_dir\n";
185 }
186 mkdir $untar_dir
187 or die "mkdir $untar_dir: $!\n";
188 }
189 else {
190 $untar_dir = File::Temp::tempdir( CLEANUP => 1 );
191 }
192
193 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
194
195 my %seen_dist;
196 for my $module (@$modules) {
f0ce33d7 197 warn "Processing $module ...\n" if defined $output_file;
cb097e7a
DM
198
199 my $m = $Maintainers::Modules{$module}
200 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
201
202 unless ($m->{CPAN}) {
203 print $outfh "WARNING: $module is not dual-life; skipping\n";
204 next;
205 }
206
207 my $dist = $m->{DISTRIBUTION};
208 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
209
210 if ($seen_dist{$dist}) {
211 warn "WARNING: duplicate entry for $dist in $module\n"
212 }
d55832d0 213
2908b263
RS
214 my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
215 next if @$wanted_upstreams and ! ($upstream ~~ $wanted_upstreams);
9546b88d 216 print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n" unless $use_diff;
d55832d0
JV
217 print $outfh " upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n";
218
cb097e7a
DM
219 $seen_dist{$dist}++;
220
8c814d1a
DM
221 my $cpan_dir;
222 eval {
223 $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist)
224 };
225 if ($@) {
226 print $outfh " ", $@;
227 print $outfh " (skipping)\n";
228 next;
229 }
cb097e7a
DM
230
231 my @perl_files = Maintainers::get_module_files($module);
232
233 my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
234 die "ERROR: no such file: $manifest\n" unless -f $manifest;
235
236 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
237 my @cpan_files = sort keys %$cpan_files;
238
239 my ($excluded, $map) = get_map($m, $module, \@perl_files);
240
241 my %perl_unseen;
242 @perl_unseen{@perl_files} = ();
243 my %perl_files = %perl_unseen;
244
245 foreach my $cpan_file (@cpan_files) {
246 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
247 unless (defined $mapped_file) {
248 print $outfh " Excluded: $cpan_file\n" if $verbose;
249 next;
250 }
251
252 if (exists $perl_files{$mapped_file}) {
253 delete $perl_unseen{$mapped_file};
254 }
255 else {
256 # some CPAN files foo are stored in core as foo.packed,
257 # which are then unpacked by 'make test_prep'
258 my $packed_file = "$mapped_file.packed";
259 if (exists $perl_files{$packed_file} ) {
260 if (! -f $mapped_file and -f $packed_file) {
261 print $outfh <<EOF;
262WARNING: $mapped_file not found, but .packed variant exists.
263Perhaps you need to run 'make test_prep'?
264EOF
265 next;
266 }
267 delete $perl_unseen{$packed_file};
268 }
269 else {
270 if ($ignorable{$cpan_file}) {
271 print $outfh " Ignored: $cpan_file\n" if $verbose;
272 next;
273 }
274
275 unless ($use_diff) {
276 print $outfh " CPAN only: $cpan_file",
277 ($cpan_file eq $mapped_file) ? "\n"
278 : " (expected $mapped_file)\n";
279 }
280 next;
281 }
282 }
283
284
285 my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
286
287 # should never happen
288 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
289
290 # might happen if the FILES entry in Maintainers.pl is wrong
291 unless (-f $mapped_file) {
292 print $outfh "WARNING: perl file not found: $mapped_file\n";
293 next;
294 }
295
9546b88d
JV
296 my $relative_mapped_file = $mapped_file;
297 $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///;
cb097e7a
DM
298
299 if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
9546b88d
JV
300
301
302 if ($use_diff) {
cb097e7a
DM
303 file_diff($outfh, $abs_cpan_file, $mapped_file,
304 $reverse, $diff_opts);
305 }
306 else {
9546b88d
JV
307 if ($cpan_file eq $relative_mapped_file) {
308 print $outfh " Modified: $relative_mapped_file\n";
cb097e7a
DM
309 }
310 else {
9546b88d 311 print $outfh " Modified: $cpan_file $relative_mapped_file\n";
cb097e7a
DM
312 }
313 }
314 }
315 elsif ($verbose) {
9546b88d 316 if ($cpan_file eq $relative_mapped_file) {
cb097e7a
DM
317 print $outfh " Unchanged: $cpan_file\n";
318 }
319 else {
9546b88d 320 print $outfh " Unchanged: $cpan_file $relative_mapped_file\n";
cb097e7a
DM
321 }
322 }
323 }
324 for (sort keys %perl_unseen) {
325 print $outfh " Perl only: $_\n" unless $use_diff;
326 }
327 }
328}
329
330# given FooBar-1.23_45.tar.gz, return FooBar
331
332sub distro_base {
333 my $d = shift;
334 $d =~ s/\.tar\.gz$//;
335 $d =~ s/\.gip$//;
336 $d =~ s/[\d\-_\.]+$//;
337 return $d;
338}
339
340# process --crosscheck action:
341# ie list all distributions whose CPAN versions differ from that listed in
342# Maintainers.pl
343
344sub do_crosscheck {
345 my ($outfh, $cache_dir, $force, $modules) = @_;
346
347 my $file = '02packages.details.txt';
348 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
349 my $path = File::Spec->catfile($download_dir, $file);
350 my $gzfile = "$path.gz";
351
352 # grab 02packages.details.txt
353
354 my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz';
355
356 if (! -f $gzfile or $force) {
357 unlink $gzfile;
358 my_getstore($url, $gzfile);
359 }
360 unlink $path;
361 IO::Uncompress::Gunzip::gunzip($gzfile, $path)
362 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
363
364 # suck in the data from it
365
366 open my $fh, '<', $path
367 or die "ERROR: open: $file: $!\n";
368
369 my %distros;
370 my %modules;
371
372 while (<$fh>) {
373 next if 1../^$/;
374 chomp;
375 my @f = split ' ', $_;
376 if (@f != 3) {
377 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
378 next;
379 }
cb097e7a 380 my $distro = $f[2];
f0ce33d7
DM
381 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
382 $modules{$f[0]} = $distro;
383
384 (my $short_distro = $distro) =~ s{^.*/}{};
cb097e7a 385
f0ce33d7 386 $distros{distro_base($short_distro)}{$distro} = 1;
cb097e7a
DM
387 }
388
389 for my $module (@$modules) {
390 my $m = $Maintainers::Modules{$module}
391 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
392
393 unless ($m->{CPAN}) {
394 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
395 next;
396 }
397
f0ce33d7 398 # given an entry like
cb097e7a
DM
399 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
400 # first compare the module name against Foo::Bar, and failing that,
401 # against foo-bar
402
403 my $pdist = $m->{DISTRIBUTION};
404 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
cb097e7a
DM
405
406 my $cdist = $modules{$module};
f0ce33d7 407 (my $short_pdist = $pdist) =~ s{^.*/}{};
cb097e7a 408
f0ce33d7
DM
409 unless (defined $cdist) {
410 my $d = $distros{distro_base($short_pdist)};
cb097e7a
DM
411 unless (defined $d) {
412 print $outfh "\n$module: Can't determine current CPAN entry\n";
413 next;
414 }
415 if (keys %$d > 1) {
416 print $outfh "\n$module: (found more than one CPAN candidate):\n";
417 print $outfh " perl: $pdist\n";
418 print $outfh " CPAN: $_\n" for sort keys %$d;
419 next;
420 }
421 $cdist = (keys %$d)[0];
422 }
423
424 if ($cdist ne $pdist) {
425 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
426 }
427 }
428}
429
430
431
432# get the EXCLUDED and MAP entries for this module, or
433# make up defauts if they don't exist
434
435sub get_map {
436 my ($m, $module_name, $perl_files) = @_;
437
438 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
439
440 $excluded ||= [];
441
442 return $excluded, $map if $map;
443
444 # all files under ext/foo-bar (plus maybe some under t/lib)???
445
446 my $ext;
447 for (@$perl_files) {
a193a2db 448 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
cb097e7a
DM
449 if (defined $ext and $ext ne $1) {
450 # more than one ext/$ext/
451 undef $ext;
452 last;
453 }
454 $ext = $1;
455 }
456 elsif (m{^t/lib/}) {
457 next;
458 }
459 else {
460 undef $ext;
461 last;
462 }
463 }
464
465 if (defined $ext) {
466 $map = { '' => $ext },
467 }
468 else {
469 (my $base = $module_name) =~ s{::}{/}g;
470 $base ="lib/$base";
471 $map = {
472 'lib/' => 'lib/',
473 '' => "$base/",
474 };
475 }
476 return $excluded, $map;
477}
478
479
480# Given an exclude list and a mapping hash, convert a CPAN filename
481# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
482# Returns an empty list for an excluded file
483
484sub cpan_to_perl {
485 my ($excluded, $map, $cpan_file) = @_;
486
487 for my $exclude (@$excluded) {
488 # may be a simple string to match exactly, or a pattern
489 if (ref $exclude) {
490 return if $cpan_file =~ $exclude;
491 }
492 else {
493 return if $cpan_file eq $exclude;
494 }
495 }
496
497 my $perl_file = $cpan_file;
498
499 # try longest prefix first, then alphabetically on tie-break
500 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
501 {
502 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
503 }
504 return $perl_file;
505}
506
507
508
509# do LWP::Simple::getstore, possibly without LWP::Simple being available
510
511my $lwp_simple_available;
512
513sub my_getstore {
514 my ($url, $file) = @_;
515 unless (defined $lwp_simple_available) {
516 eval { require LWP::Simple };
517 $lwp_simple_available = $@ eq '';
518 }
519 if ($lwp_simple_available) {
520 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
521 }
522 else {
523 return system(WGET_CMD, "-O", $file, $url) == 0;
524 }
525}
526
527
528# download and unpack a distribution
529# Returns the full pathname of the extracted directory
530# (eg '/tmp/XYZ/Foo_bar-1.23')
531
532# cache_dir: where to dowenload the .tar.gz file to
533# untar_dir: where to untar or unzup the file
534# module: name of module
535# dist: name of the distribution
536
537sub get_distribution {
538 my ($cache_dir, $untar_dir, $module, $dist) = @_;
539
540 $dist =~ m{.+/([^/]+)$}
8c814d1a 541 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
cb097e7a
DM
542 my $filename = $1;
543
544 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
545 my $download_file = File::Spec->catfile($download_dir, $filename);
546
547 # download distribution
548
549 if (-f $download_file and ! -s $download_file ) {
550 # wget can leave a zero-length file on failed download
551 unlink $download_file;
552 }
553
554 unless (-f $download_file) {
555 # not cached
556 $dist =~ /^([A-Z])([A-Z])/
8c814d1a 557 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
cb097e7a
DM
558
559 my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist";
560 my_getstore($url, $download_file)
8c814d1a 561 or die "ERROR: Could not fetch '$url'\n";
cb097e7a
DM
562 }
563
564 # extract distribution
565
566 my $ae = Archive::Extract->new( archive => $download_file);
567 $ae->extract( to => $untar_dir )
8c814d1a 568 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n";
cb097e7a
DM
569
570 # get the name of the extracted distribution dir
571
572 my $path = File::Spec->catfile($untar_dir, $filename);
573
574 $path =~ s/\.tar\.gz$// or
575 $path =~ s/\.zip$// or
576 die "ERROR: downloaded file does not have a recognised suffix: $path\n";
577
578 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
579
580 return $path;
581}
582
583
584# produce the diff of a single file
585sub file_diff {
586 my $outfh = shift;
587 my $cpan_file = shift;
588 my $perl_file = shift;
589 my $reverse = shift;
590 my $diff_opts = shift;
591
592
593 my @cmd = (DIFF_CMD, split ' ', $diff_opts);
594 if ($reverse) {
595 push @cmd, $perl_file, $cpan_file;
596 }
597 else {
598 push @cmd, $cpan_file, $perl_file;
599 }
600 my $result = `@cmd`;
601
602 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
603
604 print $outfh $result;
605}
606
607
608run();
609