| 1 | #!perl |
| 2 | # Generates info for Module::CoreList from this perl tree |
| 3 | # run this from the root of a perl tree |
| 4 | # |
| 5 | # Data is on STDOUT. |
| 6 | # |
| 7 | # With an optional arg specifying the root of a CPAN mirror, outputs the |
| 8 | # %upstream and %bug_tracker hashes too. |
| 9 | |
| 10 | use autodie; |
| 11 | use strict; |
| 12 | use warnings; |
| 13 | use File::Find; |
| 14 | use ExtUtils::MM_Unix; |
| 15 | use version; |
| 16 | use lib "Porting"; |
| 17 | use Maintainers qw(%Modules files_to_modules); |
| 18 | use File::Spec; |
| 19 | use Parse::CPAN::Meta; |
| 20 | use IPC::Cmd 'can_run'; |
| 21 | use HTTP::Tiny; |
| 22 | use IO::Uncompress::Gunzip; |
| 23 | |
| 24 | my $corelist_file = 'dist/Module-CoreList/lib/Module/CoreList.pm'; |
| 25 | my $pod_file = 'dist/Module-CoreList/lib/Module/CoreList.pod'; |
| 26 | |
| 27 | my %lines; |
| 28 | my %module_to_file; |
| 29 | my %modlist; |
| 30 | |
| 31 | die "usage: $0 [ cpan-mirror/ ] [ 5.x.y] \n" unless @ARGV <= 2; |
| 32 | my $cpan = shift; |
| 33 | my $raw_version = shift || $]; |
| 34 | my $perl_version = version->parse("$raw_version"); |
| 35 | my $perl_vnum = $perl_version->numify; |
| 36 | my $perl_vstring = $perl_version->normal; # how do we get version.pm to not give us leading v? |
| 37 | $perl_vstring =~ s/^v//; |
| 38 | |
| 39 | if ( !-f 'MANIFEST' ) { |
| 40 | die "Must be run from the root of a clean perl tree\n"; |
| 41 | } |
| 42 | |
| 43 | open( my $corelist_fh, '<', $corelist_file ); |
| 44 | my $corelist = join( '', <$corelist_fh> ); |
| 45 | |
| 46 | if ($cpan) { |
| 47 | my $modlistfile = File::Spec->catfile( $cpan, 'modules', '02packages.details.txt' ); |
| 48 | my $content; |
| 49 | |
| 50 | my $fh; |
| 51 | if ( -e $modlistfile ) { |
| 52 | warn "Reading the module list from $modlistfile"; |
| 53 | open $fh, '<', $modlistfile; |
| 54 | } elsif ( -e $modlistfile . ".gz" ) { |
| 55 | my $zcat = can_run('gzcat') || can_run('zcat') or die "Can't find gzcat or zcat"; |
| 56 | warn "Reading the module list from $modlistfile.gz"; |
| 57 | open $fh, '-|', "$zcat $modlistfile.gz"; |
| 58 | } else { |
| 59 | warn "About to fetch 02packages from ftp.funet.fi. This may take a few minutes\n"; |
| 60 | my $gzipped_content = fetch_url('http://ftp.funet.fi/pub/CPAN/modules/02packages.details.txt.gz'); |
| 61 | unless ($gzipped_content) { |
| 62 | die "Unable to read 02packages.details.txt from either your CPAN mirror or ftp.funet.fi"; |
| 63 | } |
| 64 | IO::Uncompress::Gunzip::gunzip(\$gzipped_content, \$content, Transparent => 0) |
| 65 | or die "Can't gunzip content: $IO::Uncompress::Gunzip::GunzipError"; |
| 66 | } |
| 67 | |
| 68 | if ( $fh and !$content ) { |
| 69 | local $/ = "\n"; |
| 70 | $content = join( '', <$fh> ); |
| 71 | } |
| 72 | |
| 73 | die "Incompatible modlist format" |
| 74 | unless $content =~ /^Columns: +package name, version, path/m; |
| 75 | |
| 76 | # Converting the file to a hash is about 5 times faster than a regexp flat |
| 77 | # lookup. |
| 78 | for ( split( qr/\n/, $content ) ) { |
| 79 | next unless /^([A-Za-z_:0-9]+) +[-0-9.undefHASHVERSIONvsetwhenloadingbogus]+ +(\S+)/; |
| 80 | $modlist{$1} = $2; |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | find( |
| 85 | sub { |
| 86 | /(\.pm|_pm\.PL)$/ or return; |
| 87 | /PPPort\.pm$/ and return; |
| 88 | my $module = $File::Find::name; |
| 89 | $module =~ /\b(demo|t|private)\b/ and return; # demo or test modules |
| 90 | my $version = MM->parse_version($_); |
| 91 | defined $version or $version = 'undef'; |
| 92 | $version =~ /\d/ and $version = "'$version'"; |
| 93 | |
| 94 | # some heuristics to figure out the module name from the file name |
| 95 | $module =~ s{^(lib|cpan|dist|(?:symbian/)?ext)/}{} |
| 96 | and $1 ne 'lib' |
| 97 | and ( |
| 98 | $module =~ s{\b(\w+)/\1\b}{$1}, |
| 99 | $module =~ s{^B/O}{O}, |
| 100 | $module =~ s{^Devel-PPPort}{Devel}, |
| 101 | $module =~ s{^libnet/}{}, |
| 102 | $module =~ s{^Encode/encoding}{encoding}, |
| 103 | $module =~ s{^IPC-SysV/}{IPC/}, |
| 104 | $module =~ s{^MIME-Base64/QuotedPrint}{MIME/QuotedPrint}, |
| 105 | $module =~ s{^(?:DynaLoader|Errno|Opcode|XSLoader)/}{}, |
| 106 | $module =~ s{^Sys-Syslog/win32}{Sys-Syslog}, |
| 107 | $module =~ s{^Time-Piece/Seconds}{Time/Seconds}, |
| 108 | ); |
| 109 | $module =~ s{^lib/}{}g; |
| 110 | $module =~ s{/}{::}g; |
| 111 | $module =~ s{-}{::}g; |
| 112 | $module =~ s{^.*::lib::}{}; # turns Foo/lib/Foo.pm into Foo.pm |
| 113 | $module =~ s/(\.pm|_pm\.PL)$//; |
| 114 | $lines{$module} = $version; |
| 115 | $module_to_file{$module} = $File::Find::name; |
| 116 | }, |
| 117 | 'symbian/ext', |
| 118 | 'lib', |
| 119 | 'ext', |
| 120 | 'cpan', |
| 121 | 'dist' |
| 122 | ); |
| 123 | |
| 124 | -e 'configpm' and $lines{Config} = "$]"; |
| 125 | |
| 126 | if ( open my $ucdv, "<", "lib/unicore/version" ) { |
| 127 | chomp( my $ucd = <$ucdv> ); |
| 128 | $lines{Unicode} = "'$ucd'"; |
| 129 | close $ucdv; |
| 130 | } |
| 131 | |
| 132 | my $delta_data = make_corelist_delta( |
| 133 | $perl_vnum, |
| 134 | \%lines, |
| 135 | \%Module::CoreList::version |
| 136 | ); |
| 137 | |
| 138 | my $versions_in_release = " " . $perl_vnum . " => {\n"; |
| 139 | $versions_in_release .= " delta_from => $delta_data->{delta_from},\n"; |
| 140 | $versions_in_release .= " changed => {\n"; |
| 141 | foreach my $key (sort keys $delta_data->{changed}) { |
| 142 | $versions_in_release .= sprintf " %-24s=> %s,\n", "'$key'", |
| 143 | defined $delta_data->{changed}{$key} ? "'" |
| 144 | . $delta_data->{changed}{$key} . "'" : "undef"; |
| 145 | } |
| 146 | $versions_in_release .= " },\n"; |
| 147 | $versions_in_release .= " removed => {\n"; |
| 148 | for my $key (sort keys($delta_data->{removed} || {})) { |
| 149 | $versions_in_release .= sprintf " %-24s=> %s,\n", "'$key'", 1; |
| 150 | } |
| 151 | $versions_in_release .= " }\n"; |
| 152 | $versions_in_release .= " },\n"; |
| 153 | |
| 154 | $corelist =~ s/^(my %delta\s*=\s*.*?)(^\);)$/$1$versions_in_release$2/ism; |
| 155 | |
| 156 | exit unless %modlist; |
| 157 | |
| 158 | # We have to go through this two stage lookup, given how Maintainers.pl keys its |
| 159 | # data by "Module", which is really a dist. |
| 160 | my $file_to_M = files_to_modules( values %module_to_file ); |
| 161 | |
| 162 | sub slurp_utf8($) { |
| 163 | open my $fh, "<:utf8", "$_[0]" |
| 164 | or die "can't open $_[0] for reading: $!"; |
| 165 | return do { local $/; <$fh> }; |
| 166 | } |
| 167 | |
| 168 | sub parse_cpan_meta($) { |
| 169 | return Parse::CPAN::Meta->${ |
| 170 | $_[0] =~ /\A\x7b/ ? \"load_json_string" : \"load_yaml_string" |
| 171 | }($_[0]); |
| 172 | } |
| 173 | |
| 174 | my %module_to_upstream; |
| 175 | my %module_to_dist; |
| 176 | my %dist_to_meta_YAML; |
| 177 | my %module_to_deprecated; |
| 178 | while ( my ( $module, $file ) = each %module_to_file ) { |
| 179 | my $M = $file_to_M->{$file}; |
| 180 | next unless $M; |
| 181 | next if $Modules{$M}{MAINTAINER} && $Modules{$M}{MAINTAINER} eq 'p5p'; |
| 182 | $module_to_upstream{$module} = $Modules{$M}{UPSTREAM}; |
| 183 | $module_to_deprecated{$module} = 1 if $Modules{$M}{DEPRECATED}; |
| 184 | next |
| 185 | if defined $module_to_upstream{$module} |
| 186 | && $module_to_upstream{$module} =~ /^(?:blead|first-come)$/; |
| 187 | my $dist = $modlist{$module}; |
| 188 | unless ($dist) { |
| 189 | warn "Can't find a distribution for $module\n"; |
| 190 | next; |
| 191 | } |
| 192 | $module_to_dist{$module} = $dist; |
| 193 | |
| 194 | next if exists $dist_to_meta_YAML{$dist}; |
| 195 | |
| 196 | $dist_to_meta_YAML{$dist} = undef; |
| 197 | |
| 198 | # Like it or lump it, this has to be Unix format. |
| 199 | my $meta_YAML_path = "authors/id/$dist"; |
| 200 | $meta_YAML_path =~ s/(?:tar\.gz|tar\.bz2|zip|tgz)$/meta/ |
| 201 | or die "ERROR: bad meta YAML path: '$meta_YAML_path'"; |
| 202 | my $meta_YAML_url = 'http://ftp.funet.fi/pub/CPAN/' . $meta_YAML_path; |
| 203 | |
| 204 | if ( -e "$cpan/$meta_YAML_path" ) { |
| 205 | $dist_to_meta_YAML{$dist} = parse_cpan_meta(slurp_utf8( $cpan . "/" . $meta_YAML_path )); |
| 206 | } elsif ( my $content = fetch_url($meta_YAML_url) ) { |
| 207 | unless ($content) { |
| 208 | warn "Failed to fetch $meta_YAML_url\n"; |
| 209 | next; |
| 210 | } |
| 211 | eval { $dist_to_meta_YAML{$dist} = parse_cpan_meta($content); }; |
| 212 | if ( my $err = $@ ) { |
| 213 | warn "$meta_YAML_path: ".$err; |
| 214 | next; |
| 215 | } |
| 216 | } else { |
| 217 | warn "$meta_YAML_path does not exist for $module\n"; |
| 218 | |
| 219 | # I tried code to open the tarballs with Archive::Tar to find and |
| 220 | # extract META.yml, but only Text-Tabs+Wrap-2006.1117.tar.gz had one, |
| 221 | # so it's not worth including. |
| 222 | next; |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | my $upstream_stanza = "%upstream = (\n"; |
| 227 | foreach my $module ( sort keys %module_to_upstream ) { |
| 228 | my $upstream = defined $module_to_upstream{$module} ? "'$module_to_upstream{$module}'" : 'undef'; |
| 229 | $upstream_stanza .= sprintf " %-24s=> %s,\n", "'$module'", $upstream; |
| 230 | } |
| 231 | $upstream_stanza .= ");"; |
| 232 | |
| 233 | $corelist =~ s/^%upstream .*? ;$/$upstream_stanza/ismx; |
| 234 | |
| 235 | # Deprecation generation |
| 236 | { |
| 237 | my $delta_data = make_corelist_delta( |
| 238 | $perl_vnum, |
| 239 | \%module_to_deprecated, |
| 240 | do { no warnings 'once'; \%Module::CoreList::deprecated }, |
| 241 | ); |
| 242 | |
| 243 | my $deprecated_stanza = " " . $perl_vnum . " => {\n"; |
| 244 | $deprecated_stanza .= " delta_from => $delta_data->{delta_from},\n"; |
| 245 | $deprecated_stanza .= " changed => {\n"; |
| 246 | foreach my $key (sort keys $delta_data->{changed}) { |
| 247 | $deprecated_stanza .= sprintf " %-24s=> %s,\n", "'$key'", |
| 248 | defined $delta_data->{changed}{$key} ? "'" |
| 249 | . $delta_data->{changed}{$key} . "'" : "undef"; |
| 250 | } |
| 251 | $deprecated_stanza .= " },\n"; |
| 252 | $deprecated_stanza .= " removed => {\n"; |
| 253 | for my $key (sort keys($delta_data->{removed} || {})) { |
| 254 | $deprecated_stanza .= sprintf " %-24s=> %s,\n", "'$key'", 1; |
| 255 | } |
| 256 | $deprecated_stanza .= " }\n"; |
| 257 | $deprecated_stanza .= " },\n"; |
| 258 | |
| 259 | $corelist =~ s/^(%deprecated\s*=\s*.*?)(^\);)$/$1$deprecated_stanza$2/xism; |
| 260 | } |
| 261 | |
| 262 | my $tracker = "%bug_tracker = (\n"; |
| 263 | foreach my $module ( sort keys %module_to_upstream ) { |
| 264 | my $upstream = defined $module_to_upstream{$module}; |
| 265 | next |
| 266 | if defined $upstream |
| 267 | and $upstream eq 'blead' || $upstream eq 'first-come'; |
| 268 | |
| 269 | my $bug_tracker; |
| 270 | |
| 271 | my $dist = $module_to_dist{$module}; |
| 272 | $bug_tracker = $dist_to_meta_YAML{$dist}->{resources}{bugtracker} |
| 273 | if $dist; |
| 274 | $bug_tracker = $bug_tracker->{web} if ref($bug_tracker) eq "HASH"; |
| 275 | |
| 276 | $bug_tracker = defined $bug_tracker ? quote($bug_tracker) : 'undef'; |
| 277 | next if $bug_tracker eq "'http://rt.perl.org/perlbug/'"; |
| 278 | $tracker .= sprintf " %-24s=> %s,\n", "'$module'", $bug_tracker; |
| 279 | } |
| 280 | $tracker .= ");"; |
| 281 | |
| 282 | $corelist =~ s/^%bug_tracker .*? ;/$tracker/eismx; |
| 283 | |
| 284 | unless ( |
| 285 | $corelist =~ /^%released \s* = \s* \( |
| 286 | .*? |
| 287 | $perl_vnum => .*? |
| 288 | \);/ismx |
| 289 | ) |
| 290 | { |
| 291 | warn "Adding $perl_vnum to the list of released perl versions. Please consider adding a release date.\n"; |
| 292 | $corelist =~ s/^(%released \s* = \s* .*?) ( \) ) |
| 293 | /$1 $perl_vnum => '????-??-??',\n $2/ismx; |
| 294 | } |
| 295 | |
| 296 | write_corelist($corelist,$corelist_file); |
| 297 | |
| 298 | open( my $pod_fh, '<', $pod_file ); |
| 299 | my $pod = join( '', <$pod_fh> ); |
| 300 | |
| 301 | unless ( $pod =~ /and $perl_vstring releases of perl/ ) { |
| 302 | warn "Adding $perl_vstring to the list of perl versions covered by Module::CoreList\n"; |
| 303 | $pod =~ s/(currently\s+covers\s+(?:.*?))\s*and\s+(.*?)\s+releases\s+of\s+perl/$1, $2 and $perl_vstring releases of perl/ism; |
| 304 | } |
| 305 | |
| 306 | write_corelist($pod,$pod_file); |
| 307 | |
| 308 | warn "All done. Please check over $corelist_file and $pod_file carefully before committing. Thanks!\n"; |
| 309 | |
| 310 | |
| 311 | sub write_corelist { |
| 312 | my $content = shift; |
| 313 | my $filename = shift; |
| 314 | open (my $clfh, ">", $filename); |
| 315 | binmode $clfh; |
| 316 | print $clfh $content; |
| 317 | close($clfh); |
| 318 | } |
| 319 | |
| 320 | sub fetch_url { |
| 321 | my $url = shift; |
| 322 | my $http = HTTP::Tiny->new; |
| 323 | my $response = $http->get($url); |
| 324 | if ($response->{success}) { |
| 325 | return $response->{content}; |
| 326 | } else { |
| 327 | warn "Error fetching $url: $response->{status} $response->{reason}\n"; |
| 328 | return; |
| 329 | } |
| 330 | } |
| 331 | |
| 332 | sub make_corelist_delta { |
| 333 | my($version, $lines, $existing) = @_; |
| 334 | # Trust core perl, if someone does use a weird version number the worst that |
| 335 | # can happen is an extra delta entry for a module. |
| 336 | my %versions = map { $_ => eval $lines->{$_} } keys %$lines; |
| 337 | |
| 338 | # Ensure we have the corelist data loaded from this perl checkout, not the system one. |
| 339 | require $corelist_file; |
| 340 | |
| 341 | my %deltas; |
| 342 | # Search for the release with the least amount of changes (this avoids having |
| 343 | # to ask for where this perl was branched from). |
| 344 | for my $previous(reverse sort keys %$existing) { |
| 345 | # Shouldn't happen, but ensure we don't load weird data... |
| 346 | next if $previous > $version || $previous == $version && $previous eq $version; |
| 347 | |
| 348 | my $delta = $deltas{$previous} = {}; |
| 349 | ($delta->{changed}, $delta->{removed}) = calculate_delta( |
| 350 | $existing->{$previous}, \%versions); |
| 351 | } |
| 352 | |
| 353 | my $smallest = (sort { |
| 354 | (keys($deltas{$a}->{changed}) + keys($deltas{$a}->{removed})) <=> |
| 355 | (keys($deltas{$b}->{changed})+ keys($deltas{$b}->{removed})) |
| 356 | } keys %deltas)[0]; |
| 357 | |
| 358 | return { |
| 359 | delta_from => $smallest, |
| 360 | changed => $deltas{$smallest}{changed}, |
| 361 | removed => $deltas{$smallest}{removed}, |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | # Calculate (changed, removed) modules between two versions. |
| 366 | sub calculate_delta { |
| 367 | my($from, $to) = @_; |
| 368 | my(%changed, %removed); |
| 369 | |
| 370 | for my $package(keys $from) { |
| 371 | if(not exists $to->{$package}) { |
| 372 | $removed{$package} = 1; |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | for my $package(keys $to) { |
| 377 | if(!exists $from->{$package} |
| 378 | || (defined $from->{$package} && !defined $to->{$package}) |
| 379 | || (!defined $from->{$package} && defined $to->{$package}) |
| 380 | || (defined $from->{$package} && defined $to->{$package} |
| 381 | && $from->{$package} ne $to->{$package})) { |
| 382 | $changed{$package} = $to->{$package}; |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | return \%changed, \%removed; |
| 387 | } |
| 388 | |
| 389 | sub quote { |
| 390 | my ($str) = @_; |
| 391 | # There's gotta be something already doing this properly that we could just |
| 392 | # reuse, but I can't quite thing of where to look for it, so I'm gonna do |
| 393 | # the simplest possible thing that'll allow me to release 5.17.7. --rafl |
| 394 | $str =~ s/'/\\'/g; |
| 395 | "'${str}'"; |
| 396 | } |