This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perl5133delta.pod: Notes on CvGV reference counting
[perl5.git] / Porting / core-cpan-diff
... / ...
CommitLineData
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
6
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
13use File::Temp ();
14use File::Path ();
15use File::Spec;
16use Archive::Extract;
17use IO::Uncompress::Gunzip ();
18use File::Compare ();
19use ExtUtils::Manifest;
20
21BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
22use lib 'Porting';
23use Maintainers ();
24
25# if running from blead, we may be doing -Ilib, which means when we
26# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
27# So preload the things we need, and tell it to check %INC first:
28
29use Archive::Tar;
30use IPC::Open3;
31use IO::Select;
32$Module::Load::Conditional::CHECK_INC_HASH = 1;
33# stop Archive::Extract whinging about lack of Archive::Zip
34$Archive::Extract::WARN = 0;
35
36
37# Files, which if they exist in CPAN but not in perl, will not generate
38# an 'Only in CPAN' listing
39#
40our %IGNORABLE = map { ($_ => 1) }
41 qw(.cvsignore .dualLivedDiffConfig .gitignore
42 ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
43 CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
44 GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
45 MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
46 SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
47
48# where, under the cache dir, to untar stuff to
49
50use constant UNTAR_DIR => 'untarred';
51
52use constant DIFF_CMD => 'diff';
53use constant WGET_CMD => 'wget';
54
55sub usage {
56 print STDERR "\n@_\n\n" if @_;
57 print STDERR <<HERE;
58Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
59
60-a/--all Scan all dual-life modules.
61
62-c/--cachedir Where to save downloaded CPAN tarball files
63 (defaults to /tmp/something/ with deletion after each run).
64
65-d/--diff Display file differences using diff(1), rather than just
66 listing which files have changed.
67 The diff(1) command is assumed to be in your PATH.
68
69--diffopts Options to pass to the diff command. Defaults to '-u'.
70
71-f|force Force download from CPAN of new 02packages.details.txt file
72 (with --crosscheck only).
73
74-o/--output File name to write output to (defaults to STDOUT).
75
76-r/--reverse Reverses the diff (perl to CPAN).
77
78-u/--upstream only print modules with the given upstream (defaults to all)
79
80-v/--verbose List the fate of *all* files in the tarball, not just those
81 that differ or are missing.
82
83-x|crosscheck List the distributions whose current CPAN version differs from
84 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
85
86By default (i.e. without the --crosscheck option), for each listed module
87(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
88from CPAN associated with that module, and compare the files in it with
89those in the perl source tree.
90
91Must be run from the root of the perl source tree.
92Module names must match the keys of %Modules in Maintainers.pl.
93HERE
94 exit(1);
95}
96
97
98sub run {
99 my $scan_all;
100 my $diff_opts;
101 my $reverse = 0;
102 my @wanted_upstreams;
103 my $cache_dir;
104 my $use_diff;
105 my $output_file;
106 my $verbose;
107 my $force;
108 my $do_crosscheck;
109
110 GetOptions(
111 'a|all' => \$scan_all,
112 'c|cachedir=s' => \$cache_dir,
113 'd|diff' => \$use_diff,
114 'diffopts:s' => \$diff_opts,
115 'f|force' => \$force,
116 'h|help' => \&usage,
117 'o|output=s' => \$output_file,
118 'r|reverse' => \$reverse,
119 'u|upstream=s@'=> \@wanted_upstreams,
120 'v|verbose' => \$verbose,
121 'x|crosscheck' => \$do_crosscheck,
122 ) or usage;
123
124
125 my @modules;
126
127 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
128
129 if ($do_crosscheck) {
130 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
131 if ($reverse || $use_diff || $diff_opts || $verbose);
132 }
133 else {
134 $diff_opts = '-u' unless defined $diff_opts;
135 usage("can't use -f without --crosscheck") if $force;
136 }
137
138 @modules = $scan_all
139 ? grep $Maintainers::Modules{$_}{CPAN},
140 (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
141 : @ARGV;
142 usage("No modules specified") unless @modules;
143
144
145 my $outfh;
146 if (defined $output_file) {
147 open $outfh, '>', $output_file
148 or die "ERROR: could not open file '$output_file' for writing: $!\n";
149 }
150 else {
151 open $outfh, ">&STDOUT"
152 or die "ERROR: can't dup STDOUT: $!\n";
153 }
154
155 if (defined $cache_dir) {
156 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
157 }
158
159 if ($do_crosscheck) {
160 do_crosscheck($outfh, $cache_dir, $force, \@modules);
161 }
162 else {
163 do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff,
164 $reverse, $diff_opts, \@wanted_upstreams);
165 }
166}
167
168
169
170# compare a list of modules against their CPAN equivalents
171
172sub do_compare {
173 my ($modules, $outfh, $output_file, $cache_dir, $verbose,
174 $use_diff, $reverse, $diff_opts, $wanted_upstreams) = @_;
175
176
177 # first, make sure we have a directory where they can all be untarred,
178 # and if its a permanent directory, clear any previous content
179 my $untar_dir;
180 if ($cache_dir) {
181 $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
182 if (-d $untar_dir) {
183 File::Path::rmtree($untar_dir)
184 or die "failed to remove $untar_dir\n";
185 }
186 mkdir $untar_dir
187 or die "mkdir $untar_dir: $!\n";
188 }
189 else {
190 $untar_dir = File::Temp::tempdir( CLEANUP => 1 );
191 }
192
193 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
194
195 my %seen_dist;
196 for my $module (@$modules) {
197 warn "Processing $module ...\n" if defined $output_file;
198
199 my $m = $Maintainers::Modules{$module}
200 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
201
202 unless ($m->{CPAN}) {
203 print $outfh "WARNING: $module is not dual-life; skipping\n";
204 next;
205 }
206
207 my $dist = $m->{DISTRIBUTION};
208 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
209
210 if ($seen_dist{$dist}) {
211 warn "WARNING: duplicate entry for $dist in $module\n"
212 }
213
214 my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
215 next if @$wanted_upstreams and ! ($upstream ~~ $wanted_upstreams);
216 print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n" unless $use_diff;
217 print $outfh " upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n";
218
219 $seen_dist{$dist}++;
220
221 my $cpan_dir;
222 eval {
223 $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist)
224 };
225 if ($@) {
226 print $outfh " ", $@;
227 print $outfh " (skipping)\n";
228 next;
229 }
230
231 my @perl_files = Maintainers::get_module_files($module);
232
233 my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
234 die "ERROR: no such file: $manifest\n" unless -f $manifest;
235
236 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
237 my @cpan_files = sort keys %$cpan_files;
238
239 my ($excluded, $map) = get_map($m, $module, \@perl_files);
240
241 my %perl_unseen;
242 @perl_unseen{@perl_files} = ();
243 my %perl_files = %perl_unseen;
244
245 foreach my $cpan_file (@cpan_files) {
246 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
247 unless (defined $mapped_file) {
248 print $outfh " Excluded: $cpan_file\n" if $verbose;
249 next;
250 }
251
252 if (exists $perl_files{$mapped_file}) {
253 delete $perl_unseen{$mapped_file};
254 }
255 else {
256 # some CPAN files foo are stored in core as foo.packed,
257 # which are then unpacked by 'make test_prep'
258 my $packed_file = "$mapped_file.packed";
259 if (exists $perl_files{$packed_file} ) {
260 if (! -f $mapped_file and -f $packed_file) {
261 print $outfh <<EOF;
262WARNING: $mapped_file not found, but .packed variant exists.
263Perhaps you need to run 'make test_prep'?
264EOF
265 next;
266 }
267 delete $perl_unseen{$packed_file};
268 }
269 else {
270 if ($ignorable{$cpan_file}) {
271 print $outfh " Ignored: $cpan_file\n" if $verbose;
272 next;
273 }
274
275 unless ($use_diff) {
276 print $outfh " CPAN only: $cpan_file",
277 ($cpan_file eq $mapped_file) ? "\n"
278 : " (expected $mapped_file)\n";
279 }
280 next;
281 }
282 }
283
284
285 my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
286
287 # should never happen
288 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
289
290 # might happen if the FILES entry in Maintainers.pl is wrong
291 unless (-f $mapped_file) {
292 print $outfh "WARNING: perl file not found: $mapped_file\n";
293 next;
294 }
295
296 my $relative_mapped_file = $mapped_file;
297 $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///;
298
299 if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
300
301
302 if ($use_diff) {
303 file_diff($outfh, $abs_cpan_file, $mapped_file,
304 $reverse, $diff_opts);
305 }
306 else {
307 if ($cpan_file eq $relative_mapped_file) {
308 print $outfh " Modified: $relative_mapped_file\n";
309 }
310 else {
311 print $outfh " Modified: $cpan_file $relative_mapped_file\n";
312 }
313 }
314 }
315 elsif ($verbose) {
316 if ($cpan_file eq $relative_mapped_file) {
317 print $outfh " Unchanged: $cpan_file\n";
318 }
319 else {
320 print $outfh " Unchanged: $cpan_file $relative_mapped_file\n";
321 }
322 }
323 }
324 for (sort keys %perl_unseen) {
325 print $outfh " Perl only: $_\n" unless $use_diff;
326 }
327 }
328}
329
330# given FooBar-1.23_45.tar.gz, return FooBar
331
332sub distro_base {
333 my $d = shift;
334 $d =~ s/\.tar\.gz$//;
335 $d =~ s/\.gip$//;
336 $d =~ s/[\d\-_\.]+$//;
337 return $d;
338}
339
340# process --crosscheck action:
341# ie list all distributions whose CPAN versions differ from that listed in
342# Maintainers.pl
343
344sub do_crosscheck {
345 my ($outfh, $cache_dir, $force, $modules) = @_;
346
347 my $file = '02packages.details.txt';
348 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
349 my $path = File::Spec->catfile($download_dir, $file);
350 my $gzfile = "$path.gz";
351
352 # grab 02packages.details.txt
353
354 my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz';
355
356 if (! -f $gzfile or $force) {
357 unlink $gzfile;
358 my_getstore($url, $gzfile);
359 }
360 unlink $path;
361 IO::Uncompress::Gunzip::gunzip($gzfile, $path)
362 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
363
364 # suck in the data from it
365
366 open my $fh, '<', $path
367 or die "ERROR: open: $file: $!\n";
368
369 my %distros;
370 my %modules;
371
372 while (<$fh>) {
373 next if 1../^$/;
374 chomp;
375 my @f = split ' ', $_;
376 if (@f != 3) {
377 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
378 next;
379 }
380 my $distro = $f[2];
381 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
382 $modules{$f[0]} = $distro;
383
384 (my $short_distro = $distro) =~ s{^.*/}{};
385
386 $distros{distro_base($short_distro)}{$distro} = 1;
387 }
388
389 for my $module (@$modules) {
390 my $m = $Maintainers::Modules{$module}
391 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
392
393 unless ($m->{CPAN}) {
394 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
395 next;
396 }
397
398 # given an entry like
399 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
400 # first compare the module name against Foo::Bar, and failing that,
401 # against foo-bar
402
403 my $pdist = $m->{DISTRIBUTION};
404 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
405
406 my $cdist = $modules{$module};
407 (my $short_pdist = $pdist) =~ s{^.*/}{};
408
409 unless (defined $cdist) {
410 my $d = $distros{distro_base($short_pdist)};
411 unless (defined $d) {
412 print $outfh "\n$module: Can't determine current CPAN entry\n";
413 next;
414 }
415 if (keys %$d > 1) {
416 print $outfh "\n$module: (found more than one CPAN candidate):\n";
417 print $outfh " perl: $pdist\n";
418 print $outfh " CPAN: $_\n" for sort keys %$d;
419 next;
420 }
421 $cdist = (keys %$d)[0];
422 }
423
424 if ($cdist ne $pdist) {
425 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
426 }
427 }
428}
429
430
431
432# get the EXCLUDED and MAP entries for this module, or
433# make up defauts if they don't exist
434
435sub get_map {
436 my ($m, $module_name, $perl_files) = @_;
437
438 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
439
440 $excluded ||= [];
441
442 return $excluded, $map if $map;
443
444 # all files under ext/foo-bar (plus maybe some under t/lib)???
445
446 my $ext;
447 for (@$perl_files) {
448 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
449 if (defined $ext and $ext ne $1) {
450 # more than one ext/$ext/
451 undef $ext;
452 last;
453 }
454 $ext = $1;
455 }
456 elsif (m{^t/lib/}) {
457 next;
458 }
459 else {
460 undef $ext;
461 last;
462 }
463 }
464
465 if (defined $ext) {
466 $map = { '' => $ext },
467 }
468 else {
469 (my $base = $module_name) =~ s{::}{/}g;
470 $base ="lib/$base";
471 $map = {
472 'lib/' => 'lib/',
473 '' => "$base/",
474 };
475 }
476 return $excluded, $map;
477}
478
479
480# Given an exclude list and a mapping hash, convert a CPAN filename
481# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
482# Returns an empty list for an excluded file
483
484sub cpan_to_perl {
485 my ($excluded, $map, $cpan_file) = @_;
486
487 for my $exclude (@$excluded) {
488 # may be a simple string to match exactly, or a pattern
489 if (ref $exclude) {
490 return if $cpan_file =~ $exclude;
491 }
492 else {
493 return if $cpan_file eq $exclude;
494 }
495 }
496
497 my $perl_file = $cpan_file;
498
499 # try longest prefix first, then alphabetically on tie-break
500 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
501 {
502 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
503 }
504 return $perl_file;
505}
506
507
508
509# do LWP::Simple::getstore, possibly without LWP::Simple being available
510
511my $lwp_simple_available;
512
513sub my_getstore {
514 my ($url, $file) = @_;
515 unless (defined $lwp_simple_available) {
516 eval { require LWP::Simple };
517 $lwp_simple_available = $@ eq '';
518 }
519 if ($lwp_simple_available) {
520 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
521 }
522 else {
523 return system(WGET_CMD, "-O", $file, $url) == 0;
524 }
525}
526
527
528# download and unpack a distribution
529# Returns the full pathname of the extracted directory
530# (eg '/tmp/XYZ/Foo_bar-1.23')
531
532# cache_dir: where to dowenload the .tar.gz file to
533# untar_dir: where to untar or unzup the file
534# module: name of module
535# dist: name of the distribution
536
537sub get_distribution {
538 my ($cache_dir, $untar_dir, $module, $dist) = @_;
539
540 $dist =~ m{.+/([^/]+)$}
541 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
542 my $filename = $1;
543
544 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
545 my $download_file = File::Spec->catfile($download_dir, $filename);
546
547 # download distribution
548
549 if (-f $download_file and ! -s $download_file ) {
550 # wget can leave a zero-length file on failed download
551 unlink $download_file;
552 }
553
554 unless (-f $download_file) {
555 # not cached
556 $dist =~ /^([A-Z])([A-Z])/
557 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
558
559 my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist";
560 my_getstore($url, $download_file)
561 or die "ERROR: Could not fetch '$url'\n";
562 }
563
564 # extract distribution
565
566 my $ae = Archive::Extract->new( archive => $download_file);
567 $ae->extract( to => $untar_dir )
568 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n";
569
570 # get the name of the extracted distribution dir
571
572 my $path = File::Spec->catfile($untar_dir, $filename);
573
574 $path =~ s/\.tar\.gz$// or
575 $path =~ s/\.zip$// or
576 die "ERROR: downloaded file does not have a recognised suffix: $path\n";
577
578 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
579
580 return $path;
581}
582
583
584# produce the diff of a single file
585sub file_diff {
586 my $outfh = shift;
587 my $cpan_file = shift;
588 my $perl_file = shift;
589 my $reverse = shift;
590 my $diff_opts = shift;
591
592
593 my @cmd = (DIFF_CMD, split ' ', $diff_opts);
594 if ($reverse) {
595 push @cmd, $perl_file, $cpan_file;
596 }
597 else {
598 push @cmd, $cpan_file, $perl_file;
599 }
600 my $result = `@cmd`;
601
602 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
603
604 print $outfh $result;
605}
606
607
608run();
609