This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
check for PA* in both branches of case
[perl5.git] / Porting / core-cpan-diff
... / ...
CommitLineData
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
6
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
13use File::Basename ();
14use File::Copy ();
15use File::Temp ();
16use File::Path ();
17use File::Spec::Functions;
18use Archive::Extract;
19use IO::Uncompress::Gunzip ();
20use File::Compare ();
21use ExtUtils::Manifest;
22use ExtUtils::MakeMaker ();
23use HTTP::Tiny;
24
25BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
26use lib 'Porting';
27use Maintainers ();
28
29# if running from blead, we may be doing -Ilib, which means when we
30# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
31# So preload the things we need, and tell it to check %INC first:
32
33use Archive::Tar;
34use IPC::Open3;
35use IO::Select;
36$Module::Load::Conditional::CHECK_INC_HASH = 1;
37
38# stop Archive::Extract whinging about lack of Archive::Zip
39$Archive::Extract::WARN = 0;
40
41# where, under the cache dir, to download tarballs to
42use constant SRC_DIR => 'tarballs';
43
44# where, under the cache dir, to untar stuff to
45use constant UNTAR_DIR => 'untarred';
46
47use constant DIFF_CMD => 'diff';
48use constant WGET_CMD => 'wget';
49
50sub usage {
51 print STDERR "\n@_\n\n" if @_;
52 print STDERR <<HERE;
53Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
54
55-a/--all Scan all dual-life modules.
56
57-c/--cachedir Where to save downloaded CPAN tarball files
58 (defaults to /tmp/something/ with deletion after each run).
59
60-d/--diff Display file differences using diff(1), rather than just
61 listing which files have changed.
62 The diff(1) command is assumed to be in your PATH.
63
64--diffopts Options to pass to the diff command. Defaults to '-u'.
65
66-f|force Force download from CPAN of new 02packages.details.txt file
67 (with --crosscheck only).
68
69-m|mirror Preferred CPAN mirror URI (http:// or file:///)
70 (Local mirror must be a complete mirror, not minicpan)
71
72-o/--output File name to write output to (defaults to STDOUT).
73
74-r/--reverse Reverses the diff (perl to CPAN).
75
76-u/--upstream only print modules with the given upstream (defaults to all)
77
78-v/--verbose List the fate of *all* files in the tarball, not just those
79 that differ or are missing.
80
81-x|crosscheck List the distributions whose current CPAN version differs from
82 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
83
84By default (i.e. without the --crosscheck option), for each listed module
85(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
86from CPAN associated with that module, and compare the files in it with
87those in the perl source tree.
88
89Must be run from the root of the perl source tree.
90Module names must match the keys of %Modules in Maintainers.pl.
91HERE
92 exit(1);
93}
94
95sub run {
96 my $scan_all;
97 my $diff_opts;
98 my $reverse = 0;
99 my @wanted_upstreams;
100 my $cache_dir;
101 my $mirror_url = "http://www.cpan.org/";
102 my $use_diff;
103 my $output_file;
104 my $verbose;
105 my $force;
106 my $do_crosscheck;
107
108 GetOptions(
109 'a|all' => \$scan_all,
110 'c|cachedir=s' => \$cache_dir,
111 'd|diff' => \$use_diff,
112 'diffopts:s' => \$diff_opts,
113 'f|force' => \$force,
114 'h|help' => \&usage,
115 'm|mirror=s' => \$mirror_url,
116 'o|output=s' => \$output_file,
117 'r|reverse' => \$reverse,
118 'u|upstream=s@' => \@wanted_upstreams,
119 'v|verbose' => \$verbose,
120 'x|crosscheck' => \$do_crosscheck,
121 ) or usage;
122
123 my @modules;
124
125 usage("Cannot mix -a with module list") if $scan_all && @ARGV;
126
127 if ($do_crosscheck) {
128 usage("can't use -r, -d, --diffopts, -v with --crosscheck")
129 if ( $reverse || $use_diff || $diff_opts || $verbose );
130 }
131 else {
132 $diff_opts = '-u -b' unless defined $diff_opts;
133 usage("can't use -f without --crosscheck") if $force;
134 }
135
136 @modules =
137 $scan_all
138 ? grep $Maintainers::Modules{$_}{CPAN},
139 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
140 : @ARGV;
141 usage("No modules specified") unless @modules;
142
143 my $outfh;
144 if ( defined $output_file ) {
145 open $outfh, '>', $output_file
146 or die "ERROR: could not open file '$output_file' for writing: $!\n";
147 }
148 else {
149 open $outfh, ">&STDOUT"
150 or die "ERROR: can't dup STDOUT: $!\n";
151 }
152
153 if ( defined $cache_dir ) {
154 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
155 }
156 else {
157 $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
158 }
159
160 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
161 my $test_file = "modules/03modlist.data.gz";
162 my_getstore(
163 cpan_url( $mirror_url, $test_file ),
164 catfile( $cache_dir, $test_file )
165 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
166
167 if ($do_crosscheck) {
168 do_crosscheck( $outfh, $cache_dir, $mirror_url, $force, \@modules );
169 }
170 else {
171 do_compare(
172 \@modules, $outfh, $output_file,
173 $cache_dir, $mirror_url, $verbose,
174 $use_diff, $reverse, $diff_opts,
175 \@wanted_upstreams
176 );
177 }
178}
179
180# construct a CPAN url
181
182sub cpan_url {
183 my ( $mirror_url, @path ) = @_;
184 return $mirror_url unless @path;
185 my $cpan_path = join( "/", map { split "/", $_ } @path );
186 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing
187 return $mirror_url . $cpan_path;
188}
189
190# construct a CPAN URL for a author/distribution string like:
191# BINGOS/Archive-Extract-0.52.tar.gz
192
193sub cpan_url_distribution {
194 my ( $mirror_url, $distribution ) = @_;
195 $distribution =~ /^([A-Z])([A-Z])/
196 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
197 my $path = "modules/by-authors/id/$1/$1$2/$distribution";
198 return cpan_url( $mirror_url, $path );
199}
200
201# compare a list of modules against their CPAN equivalents
202
203sub do_compare {
204 my (
205 $modules, $outfh, $output_file, $cache_dir,
206 $mirror_url, $verbose, $use_diff, $reverse,
207 $diff_opts, $wanted_upstreams
208 ) = @_;
209
210 # first, make sure we have a directory where they can all be untarred,
211 # and if its a permanent directory, clear any previous content
212 my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
213 my $src_dir = catdir( $cache_dir, SRC_DIR );
214 for my $d ( $src_dir, $untar_dir ) {
215 next if -d $d;
216 mkdir $d or die "mkdir $d: $!\n";
217 }
218
219 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
220
221 my %seen_dist;
222 for my $module (@$modules) {
223 warn "Processing $module ...\n" if defined $output_file;
224
225 my $m = $Maintainers::Modules{$module}
226 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
227
228 unless ( $m->{CPAN} ) {
229 print $outfh "WARNING: $module is not dual-life; skipping\n";
230 next;
231 }
232
233 my $dist = $m->{DISTRIBUTION};
234 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
235
236 if ( $seen_dist{$dist}++ ) {
237 warn "WARNING: duplicate entry for $dist in $module\n";
238 }
239
240 my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
241 next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams );
242
243 print $outfh "\n$module - "
244 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
245 print $outfh " upstream is: "
246 . ( $m->{UPSTREAM} || 'UNKNOWN!' ) . "\n";
247
248 my $cpan_dir;
249 eval {
250 $cpan_dir =
251 get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
252 $dist );
253 };
254 if ($@) {
255 print $outfh " ", $@;
256 print $outfh " (skipping)\n";
257 next;
258 }
259
260 my @perl_files = Maintainers::get_module_files($module);
261
262 my $manifest = catfile( $cpan_dir, 'MANIFEST' );
263 die "ERROR: no such file: $manifest\n" unless -f $manifest;
264
265 my $cpan_files = ExtUtils::Manifest::maniread($manifest);
266 my @cpan_files = sort keys %$cpan_files;
267
268 ( my $main_pm = $module ) =~ s{::}{/}g;
269 $main_pm .= ".pm";
270
271 my ( $excluded, $map, $customized ) =
272 get_map( $m, $module, \@perl_files );
273
274 my %perl_unseen;
275 @perl_unseen{@perl_files} = ();
276 my %perl_files = %perl_unseen;
277
278 foreach my $cpan_file (@cpan_files) {
279 my $mapped_file =
280 cpan_to_perl( $excluded, $map, $customized, $cpan_file );
281 unless ( defined $mapped_file ) {
282 print $outfh " Excluded: $cpan_file\n" if $verbose;
283 next;
284 }
285
286 if ( exists $perl_files{$mapped_file} ) {
287 delete $perl_unseen{$mapped_file};
288 }
289 else {
290
291 # some CPAN files foo are stored in core as foo.packed,
292 # which are then unpacked by 'make test_prep'
293 my $packed_file = "$mapped_file.packed";
294 if ( exists $perl_files{$packed_file} ) {
295 if ( !-f $mapped_file and -f $packed_file ) {
296 print $outfh <<EOF;
297WARNING: $mapped_file not found, but .packed variant exists.
298Perhaps you need to run 'make test_prep'?
299EOF
300 next;
301 }
302 delete $perl_unseen{$packed_file};
303 }
304 else {
305 if ( $ignorable{$cpan_file} ) {
306 print $outfh " Ignored: $cpan_file\n" if $verbose;
307 next;
308 }
309
310 unless ($use_diff) {
311 print $outfh " CPAN only: $cpan_file",
312 ( $cpan_file eq $mapped_file )
313 ? "\n"
314 : " (missing $mapped_file)\n";
315 }
316 next;
317 }
318 }
319
320 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
321
322 # should never happen
323 die "ERROR: can't find file $abs_cpan_file\n"
324 unless -f $abs_cpan_file;
325
326 # might happen if the FILES entry in Maintainers.pl is wrong
327 unless ( -f $mapped_file ) {
328 print $outfh "WARNING: perl file not found: $mapped_file\n";
329 next;
330 }
331
332 my $relative_mapped_file = relatively_mapped($mapped_file);
333
334 my $different =
335 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
336 $diff_opts );
337 if ( $different && customized( $m, $relative_mapped_file ) ) {
338 if (! $use_diff ) {
339 print $outfh " Customized for blead: $relative_mapped_file\n";
340 }
341 }
342 elsif ($different) {
343 if ($use_diff) {
344 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
345 print $outfh $different;
346 }
347 else {
348 if ( $cpan_file eq $relative_mapped_file ) {
349 print $outfh " Modified: $relative_mapped_file\n";
350 }
351 else {
352 print $outfh
353 " Modified: $cpan_file $relative_mapped_file\n";
354 }
355
356 if ( $cpan_file =~ m{\.pm\z} ) {
357 my $pv = MM->parse_version($mapped_file) || 'unknown';
358 my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
359 if ( $pv ne $cv ) {
360 print $outfh
361" Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n";
362 }
363 }
364
365 }
366 }
367 elsif ( customized( $m, $relative_mapped_file ) ) {
368 # Maintainers.pl says we customized it, but it looks the
369 # same as CPAN so maybe we lost the customization, which
370 # could be bad
371 if ( $cpan_file eq $relative_mapped_file ) {
372 print $outfh " Blead customization missing: $cpan_file\n";
373 }
374 else {
375 print $outfh
376 " Blead customization missing: $cpan_file $relative_mapped_file\n";
377 }
378 }
379 elsif ($verbose) {
380 if ( $cpan_file eq $relative_mapped_file ) {
381 print $outfh " Unchanged: $cpan_file\n";
382 }
383 else {
384 print $outfh
385 " Unchanged: $cpan_file $relative_mapped_file\n";
386 }
387 }
388 }
389 for ( sort keys %perl_unseen ) {
390 my $relative_mapped_file = relatively_mapped($_);
391 if ( customized( $m, $relative_mapped_file ) ) {
392 print $outfh " Customized for blead: $_\n";
393 }
394 else {
395 print $outfh " Perl only: $_\n" unless $use_diff;
396 }
397 }
398 }
399}
400
401sub relatively_mapped {
402 my $relative = shift;
403 $relative =~ s/^(cpan|dist|ext)\/.*?\///;
404 return $relative;
405}
406
407# given FooBar-1.23_45.tar.gz, return FooBar
408
409sub distro_base {
410 my $d = shift;
411 $d =~ s/\.tar\.gz$//;
412 $d =~ s/\.gip$//;
413 $d =~ s/[\d\-_\.]+$//;
414 return $d;
415}
416
417# process --crosscheck action:
418# ie list all distributions whose CPAN versions differ from that listed in
419# Maintainers.pl
420
421sub do_crosscheck {
422 my ( $outfh, $cache_dir, $mirror_url, $force, $modules ) = @_;
423
424 my $file = '02packages.details.txt';
425 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
426 my $path = catfile( $download_dir, $file );
427 my $gzfile = "$path.gz";
428
429 # grab 02packages.details.txt
430
431 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
432
433 if ( !-f $gzfile or $force ) {
434 unlink $gzfile;
435 my_getstore( $url, $gzfile );
436 }
437 unlink $path;
438 IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
439 or die
440 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
441
442 # suck in the data from it
443
444 open my $fh, '<', $path
445 or die "ERROR: open: $file: $!\n";
446
447 my %distros;
448 my %modules;
449
450 while (<$fh>) {
451 next if 1 .. /^$/;
452 chomp;
453 my @f = split ' ', $_;
454 if ( @f != 3 ) {
455 warn
456 "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
457 next;
458 }
459 my $distro = $f[2];
460 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
461 $modules{ $f[0] } = $distro;
462
463 ( my $short_distro = $distro ) =~ s{^.*/}{};
464
465 $distros{ distro_base($short_distro) }{$distro} = 1;
466 }
467
468 for my $module (@$modules) {
469 my $m = $Maintainers::Modules{$module}
470 or die "ERROR: No such module in Maintainers.pl: '$module'\n";
471
472 unless ( $m->{CPAN} ) {
473 print $outfh "\nWARNING: $module is not dual-life; skipping\n";
474 next;
475 }
476
477 # given an entry like
478 # Foo::Bar 1.23 foo-bar-1.23.tar.gz,
479 # first compare the module name against Foo::Bar, and failing that,
480 # against foo-bar
481
482 my $pdist = $m->{DISTRIBUTION};
483 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
484
485 my $cdist = $modules{$module};
486 ( my $short_pdist = $pdist ) =~ s{^.*/}{};
487
488 unless ( defined $cdist ) {
489 my $d = $distros{ distro_base($short_pdist) };
490 unless ( defined $d ) {
491 print $outfh "\n$module: Can't determine current CPAN entry\n";
492 next;
493 }
494 if ( keys %$d > 1 ) {
495 print $outfh
496 "\n$module: (found more than one CPAN candidate):\n";
497 print $outfh " perl: $pdist\n";
498 print $outfh " CPAN: $_\n" for sort keys %$d;
499 next;
500 }
501 $cdist = ( keys %$d )[0];
502 }
503
504 if ( $cdist ne $pdist ) {
505 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n";
506 }
507 }
508}
509
510# get the EXCLUDED and MAP entries for this module, or
511# make up defauts if they don't exist
512
513sub get_map {
514 my ( $m, $module_name, $perl_files ) = @_;
515
516 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
517
518 $excluded ||= [];
519 $customized ||= [];
520
521 return $excluded, $map, $customized if $map;
522
523 # all files under ext/foo-bar (plus maybe some under t/lib)???
524
525 my $ext;
526 for (@$perl_files) {
527 if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
528 if ( defined $ext and $ext ne $1 ) {
529
530 # more than one ext/$ext/
531 undef $ext;
532 last;
533 }
534 $ext = $1;
535 }
536 elsif (m{^t/lib/}) {
537 next;
538 }
539 else {
540 undef $ext;
541 last;
542 }
543 }
544
545 if ( defined $ext ) {
546 $map = { '' => $ext },;
547 }
548 else {
549 ( my $base = $module_name ) =~ s{::}{/}g;
550 $base = "lib/$base";
551 $map = {
552 'lib/' => 'lib/',
553 '' => "$base/",
554 };
555 }
556 return $excluded, $map, $customized;
557}
558
559# Given an exclude list and a mapping hash, convert a CPAN filename
560# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
561# Returns an empty list for an excluded file
562
563sub cpan_to_perl {
564 my ( $excluded, $map, $customized, $cpan_file ) = @_;
565
566 for my $exclude (@$excluded) {
567 next if $exclude ~~ $customized;
568
569 # may be a simple string to match exactly, or a pattern
570 if ( ref $exclude ) {
571 return if $cpan_file =~ $exclude;
572 }
573 else {
574 return if $cpan_file eq $exclude;
575 }
576 }
577
578 my $perl_file = $cpan_file;
579
580 # try longest prefix first, then alphabetically on tie-break
581 for
582 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
583 {
584 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
585 }
586 return $perl_file;
587}
588
589# fetch a file from a URL and store it in a file given by a filename
590
591sub my_getstore {
592 my ( $url, $file ) = @_;
593 File::Path::mkpath( File::Basename::dirname($file) );
594 if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
595 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
596 File::Copy::copy( $local_path, $file );
597 } else {
598 my $http = HTTP::Tiny->new;
599 my $response = $http->mirror($url, $file);
600 return $response->{success};
601 }
602}
603
604# download and unpack a distribution
605# Returns the full pathname of the extracted directory
606# (eg '/tmp/XYZ/Foo_bar-1.23')
607
608# cache_dir: where to download the .tar.gz file to
609# mirror_url: CPAN mirror to download from
610# untar_dir: where to untar or unzup the file
611# module: name of module
612# dist: name of the distribution
613
614sub get_distribution {
615 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
616
617 $dist =~ m{.+/([^/]+)$}
618 or die
619 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
620 my $filename = $1;
621
622 my $download_file = catfile( $src_dir, $filename );
623
624 # download distribution
625
626 if ( -f $download_file and !-s $download_file ) {
627
628 # wget can leave a zero-length file on failed download
629 unlink $download_file;
630 }
631
632 unless ( -f $download_file ) {
633
634 # not cached
635 my $url = cpan_url_distribution( $mirror_url, $dist );
636 my_getstore( $url, $download_file )
637 or die "ERROR: Could not fetch '$url'\n";
638 }
639
640 # get the expected name of the extracted distribution dir
641
642 my $path = catfile( $untar_dir, $filename );
643
644 $path =~ s/\.tar\.gz$//
645 or $path =~ s/\.zip$//
646 or die
647 "ERROR: downloaded file does not have a recognised suffix: $path\n";
648
649 # extract it unless we already have it cached or tarball is newer
650 if ( !-d $path || ( -M $download_file < -M $path ) ) {
651 my $ae = Archive::Extract->new( archive => $download_file );
652 $ae->extract( to => $untar_dir )
653 or die
654 "ERROR: failed to extract distribution '$download_file to temp. dir: "
655 . $ae->error() . "\n";
656
657 $path = $ae->extract_path;
658 }
659
660 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
661
662 return $path;
663}
664
665# produce the diff of a single file
666sub file_diff {
667 my $outfh = shift;
668 my $cpan_file = shift;
669 my $perl_file = shift;
670 my $reverse = shift;
671 my $diff_opts = shift;
672
673 my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
674 if ($reverse) {
675 push @cmd, $perl_file, $cpan_file;
676 }
677 else {
678 push @cmd, $cpan_file, $perl_file;
679 }
680 return `@cmd`;
681
682}
683
684sub customized {
685 my ( $module_data, $file ) = @_;
686 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };
687}
688
689run();
690