| 1 | #!perl |
| 2 | use 5.010; |
| 3 | use strict; |
| 4 | use warnings; |
| 5 | use lib 'Porting'; |
| 6 | use Maintainers qw/%Modules/; |
| 7 | use lib 'dist/Module-CoreList/lib'; |
| 8 | use Module::CoreList; |
| 9 | use Getopt::Long; |
| 10 | |
| 11 | =head1 USAGE |
| 12 | |
| 13 | # generate the module changes for the Perl you are currently building |
| 14 | ./perl -Ilib Porting/corelist-perldelta.pl |
| 15 | |
| 16 | # update the module changes for the Perl you are currently building |
| 17 | ./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod |
| 18 | |
| 19 | # generate a diff between the corelist sections of two perldelta* files: |
| 20 | perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod |
| 21 | |
| 22 | =head1 ABOUT |
| 23 | |
| 24 | corelist-perldelta.pl is a bit schizophrenic. The part to generate the |
| 25 | new Perldelta text does not need Algorithm::Diff, but wants to be |
| 26 | run with the freshly built Perl. |
| 27 | |
| 28 | The part to check the diff wants to be run with a Perl that has an up-to-date |
| 29 | L<Module::CoreList>, but needs the outside L<Algorithm::Diff>. |
| 30 | |
| 31 | Ideally, the program will be split into two separate programs, one |
| 32 | to generate the text and one to show the diff between the |
| 33 | corelist sections of the last perldelta and the next perldelta. |
| 34 | |
| 35 | Currently no information about Removed Modules is displayed in any of the |
| 36 | modes. |
| 37 | |
| 38 | =cut |
| 39 | |
| 40 | my %sections = ( |
| 41 | new => qr/New Modules and Pragma(ta)?/, |
| 42 | updated => qr/Updated Modules and Pragma(ta)?/, |
| 43 | removed => qr/Removed Modules and Pragma(ta)?/, |
| 44 | ); |
| 45 | |
| 46 | my %titles = ( |
| 47 | new => 'New Modules and Pragmata', |
| 48 | updated => 'Updated Modules and Pragmata', |
| 49 | removed => 'Removed Modules and Pragmata', |
| 50 | ); |
| 51 | |
| 52 | my $deprecated; |
| 53 | |
| 54 | sub run { |
| 55 | my %opt = (mode => 'generate'); |
| 56 | |
| 57 | GetOptions(\%opt, |
| 58 | 'mode|m:s', # 'generate', 'check', 'update' |
| 59 | ); |
| 60 | |
| 61 | # by default, compare latest two version in CoreList; |
| 62 | my ($old, $new) = latest_two_perl_versions(); |
| 63 | |
| 64 | # use the provided versions if present |
| 65 | # @ARGV >=2 means [old_version] [new_version] [path/to/file] |
| 66 | if ( @ARGV >= 2) { |
| 67 | ($old, $new) = (shift @ARGV, shift @ARGV); |
| 68 | die "$old is an invalid version\n" if not exists |
| 69 | $Module::CoreList::version{$old}; |
| 70 | die "$new is an invalid version\n" if not exists |
| 71 | $Module::CoreList::version{$new}; |
| 72 | } |
| 73 | |
| 74 | if ( $opt{mode} eq 'generate' ) { |
| 75 | do_generate($old => $new); |
| 76 | } |
| 77 | elsif ( $opt{mode} eq 'check' ) { |
| 78 | do_check(\*ARGV, $old => $new); |
| 79 | } |
| 80 | elsif ( $opt{mode} eq 'update' ) { |
| 81 | do_update_existing(shift @ARGV, $old => $new); |
| 82 | } |
| 83 | else { |
| 84 | die "Unrecognized mode '$opt{mode}'\n"; |
| 85 | } |
| 86 | |
| 87 | exit 0; |
| 88 | } |
| 89 | |
| 90 | sub latest_two_perl_versions { |
| 91 | |
| 92 | my @versions = sort keys %Module::CoreList::version; |
| 93 | |
| 94 | my $new = pop @versions; |
| 95 | |
| 96 | # If a fully-padded version number ends in a zero (as in "5.019010"), that |
| 97 | # version shows up in %Module::CoreList::version both with and without its |
| 98 | # trailing zeros. So skip all versions that are numerically equal to $new. |
| 99 | pop @versions while @versions && $versions[-1] == $new; |
| 100 | |
| 101 | die "Too few distinct core versions in %Module::CoreList::version ?!\n" |
| 102 | if !@versions; |
| 103 | |
| 104 | return $versions[-1], $new; |
| 105 | } |
| 106 | |
| 107 | # Given two perl versions, it returns a list describing the core distributions that have changed. |
| 108 | # The first three elements are hashrefs corresponding to new, updated, and removed modules |
| 109 | # and are of the form (mostly, see the special remarks about removed): |
| 110 | # 'Distribution Name' => ['Distribution Name', previous version number, current version number] |
| 111 | # where the version number is undef if the distribution did not exist. |
| 112 | # The fourth element is an arrayref of core distribution names of those distribution for which it |
| 113 | # is unknown whether they have changed and therefore need to be manually checked. |
| 114 | # |
| 115 | # In most cases, the distribution name in %Modules corresponds to the module that is representative |
| 116 | # of the distribution as listed in Module::CoreList. However, there are a few distribution names |
| 117 | # that do not correspond to a module. %distToModules has been created which maps the distribution |
| 118 | # name to a representative module. The representative module was chosen by either looking at the |
| 119 | # Makefile of the distribution or by seeing which module the distribution has been traditionally |
| 120 | # listed under in past perldeltas. |
| 121 | # |
| 122 | # There are a few distributions for which there is no single representative module (e.g. libnet). |
| 123 | # These distributions are returned as the last element of the list. |
| 124 | # |
| 125 | # %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p. |
| 126 | # This list contains modules and pragmata that may also be present in Module::CoreList. |
| 127 | # A list of modules are in the list @unclaimedModules, which were manually listed based on whether |
| 128 | # they were independent modules and whether they have been listed in past perldeltas. |
| 129 | # The pragmata were found by doing something like: |
| 130 | # say for sort grep { $_ eq lc $_ and !exists $Modules{$_}} |
| 131 | # keys %{$Module::CoreList::version{'5.019003'}} |
| 132 | # and manually filtering out pragamata that were already covered. |
| 133 | # |
| 134 | # It is currently not possible to differentiate between a removed module and a removed |
| 135 | # distribution. Therefore, the removed hashref contains every module that has been removed, even if |
| 136 | # the module's corresponding distribution has not been removed. |
| 137 | |
| 138 | sub corelist_delta { |
| 139 | my ($old, $new) = @_; |
| 140 | my $corelist = \%Module::CoreList::version; |
| 141 | my %changes = Module::CoreList::changes_between( $old, $new ); |
| 142 | $deprecated = $Module::CoreList::deprecated{$new}; |
| 143 | |
| 144 | my $getModifyType = sub { |
| 145 | my $data = shift; |
| 146 | if ( exists $data->{left} and exists $data->{right} ) { |
| 147 | return 'updated'; |
| 148 | } |
| 149 | elsif ( !exists $data->{left} and exists $data->{right} ) { |
| 150 | return 'new'; |
| 151 | } |
| 152 | elsif ( exists $data->{left} and !exists $data->{right} ) { |
| 153 | return 'removed'; |
| 154 | } |
| 155 | return undef; |
| 156 | }; |
| 157 | |
| 158 | my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap Win32CORE/; |
| 159 | my @unclaimedPragmata = qw/arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/; |
| 160 | my @unclaimed = (@unclaimedModules, @unclaimedPragmata); |
| 161 | |
| 162 | my %distToModules = ( |
| 163 | 'IO-Compress' => [ |
| 164 | { |
| 165 | 'name' => 'IO-Compress', |
| 166 | 'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ), |
| 167 | 'data' => $changes{'IO::Compress::Base'} |
| 168 | } |
| 169 | ], |
| 170 | 'Locale-Codes' => [ |
| 171 | { |
| 172 | 'name' => 'Locale::Codes', |
| 173 | 'modification' => $getModifyType->( $changes{'Locale::Codes'} ), |
| 174 | 'data' => $changes{'Locale::Codes'} |
| 175 | } |
| 176 | ], |
| 177 | 'PathTools' => [ |
| 178 | { |
| 179 | 'name' => 'File::Spec', |
| 180 | 'modification' => $getModifyType->( $changes{'Cwd'} ), |
| 181 | 'data' => $changes{'Cwd'} |
| 182 | } |
| 183 | ], |
| 184 | 'Scalar-List-Utils' => [ |
| 185 | { |
| 186 | 'name' => 'List::Util', |
| 187 | 'modification' => $getModifyType->( $changes{'List::Util'} ), |
| 188 | 'data' => $changes{'List::Util'} |
| 189 | }, |
| 190 | { |
| 191 | 'name' => 'Scalar::Util', |
| 192 | 'modification' => $getModifyType->( $changes{'Scalar::Util'} ), |
| 193 | 'data' => $changes{'Scalar::Util'} |
| 194 | } |
| 195 | ], |
| 196 | 'Text-Tabs+Wrap' => [ |
| 197 | { |
| 198 | 'name' => 'Text::Tabs', |
| 199 | 'modification' => $getModifyType->( $changes{'Text::Tabs'} ), |
| 200 | 'data' => $changes{'Text::Tabs'} |
| 201 | }, |
| 202 | { |
| 203 | 'name' => 'Text::Wrap', |
| 204 | 'modification' => $getModifyType->( $changes{'Text::Wrap'} ), |
| 205 | 'data' => $changes{'Text::Wrap'} |
| 206 | } |
| 207 | ], |
| 208 | ); |
| 209 | |
| 210 | # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ] |
| 211 | my $deltaGrouping = {}; |
| 212 | |
| 213 | # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it |
| 214 | my @manuallyCheck; |
| 215 | |
| 216 | # %Modules defines what is currently in core |
| 217 | for my $k ( keys %Modules ) { |
| 218 | next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed |
| 219 | next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed |
| 220 | |
| 221 | my ( $distName, $modifyType, $data ); |
| 222 | |
| 223 | if ( exists $changes{$k} ) { |
| 224 | $distName = $k; |
| 225 | $modifyType = $getModifyType->( $changes{$k} ); |
| 226 | $data = $changes{$k}; |
| 227 | } |
| 228 | elsif ( exists $distToModules{$k} ) { |
| 229 | # modification will be undef if the distribution has not changed |
| 230 | my @modules = grep { $_->{modification} } @{ $distToModules{$k} }; |
| 231 | for (@modules) { |
| 232 | $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ]; |
| 233 | } |
| 234 | next; |
| 235 | } |
| 236 | else { |
| 237 | push @manuallyCheck, $k and next; |
| 238 | } |
| 239 | |
| 240 | $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ]; |
| 241 | } |
| 242 | |
| 243 | for my $k (@unclaimed) { |
| 244 | if ( exists $changes{$k} ) { |
| 245 | $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} = |
| 246 | [ $k, $changes{$k}->{left}, $changes{$k}->{right} ]; |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | # in old corelist, but not this one => removed |
| 251 | # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from |
| 252 | # distributions will show up here, too. Some person will have to review to see what's |
| 253 | # important. That's the best we can do without a historical Maintainers.pl |
| 254 | for my $k ( keys %{ $corelist->{$old} } ) { |
| 255 | if ( ! exists $corelist->{$new}{$k} ) { |
| 256 | $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ]; |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | return ( |
| 261 | \%{ $deltaGrouping->{'new'} }, |
| 262 | \%{ $deltaGrouping->{'removed'} }, |
| 263 | \%{ $deltaGrouping->{'updated'} }, |
| 264 | \@manuallyCheck |
| 265 | ); |
| 266 | } |
| 267 | |
| 268 | # currently does not update the Removed Module section |
| 269 | sub do_update_existing { |
| 270 | my ( $existing, $old, $new ) = @_; |
| 271 | |
| 272 | my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new ); |
| 273 | if ($manuallyCheck) { |
| 274 | print "It cannot be determined whether the following distributions have changed.\n"; |
| 275 | print "Please check and list accordingly:\n"; |
| 276 | say "\t* $_" for sort @{$manuallyCheck}; |
| 277 | print "\n"; |
| 278 | } |
| 279 | |
| 280 | my $data = { |
| 281 | new => $added, |
| 282 | updated => $updated, |
| 283 | #removed => $removed, ignore removed for now |
| 284 | }; |
| 285 | |
| 286 | my $text = DeltaUpdater::transform_pod( $existing, $data ); |
| 287 | open my $out, '>', $existing or die "can't open perldelta file $existing: $!"; |
| 288 | binmode($out); |
| 289 | print $out $text; |
| 290 | close $out; |
| 291 | say "The New and Updated Modules and Pragamata sections in $existing have been updated"; |
| 292 | say "Please ensure the Removed Modules and Pragmata section is up-to-date"; |
| 293 | } |
| 294 | |
| 295 | sub do_generate { |
| 296 | my ($old, $new) = @_; |
| 297 | my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new); |
| 298 | |
| 299 | if ($manuallyCheck) { |
| 300 | print "\nXXXIt cannot be determined whether the following distributions have changed.\n"; |
| 301 | print "Please check and list accordingly:\n"; |
| 302 | say "\t$_" for @{$manuallyCheck}; |
| 303 | print "\n"; |
| 304 | } |
| 305 | |
| 306 | my $data = { |
| 307 | new => $added, |
| 308 | updated => $updated, |
| 309 | #removed => $removed, ignore removed for now |
| 310 | }; |
| 311 | |
| 312 | say DeltaUpdater::sections_to_pod($data) |
| 313 | } |
| 314 | |
| 315 | sub do_check { |
| 316 | my ($in, $old, $new) = @_; |
| 317 | |
| 318 | my $delta = DeltaParser->new($in); |
| 319 | my ($added, $removed, $updated) = corelist_delta($old => $new); |
| 320 | |
| 321 | # because of the difficulty in identifying the distribution for removed modules |
| 322 | # don't bother checking them |
| 323 | for my $ck ([ 'new', $delta->new_modules, $added ], |
| 324 | #[ 'removed', $delta->removed_modules, $removed ], |
| 325 | [ 'updated', $delta->updated_modules, $updated ] ) { |
| 326 | my @delta = @{ $ck->[1] }; |
| 327 | my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] }; |
| 328 | |
| 329 | printf $ck->[0] . ":\n"; |
| 330 | |
| 331 | require Algorithm::Diff; |
| 332 | my $diff = Algorithm::Diff->new(map { |
| 333 | [map { join q{ } => grep defined, @{ $_ } } @{ $_ }] |
| 334 | } \@delta, \@corelist); |
| 335 | |
| 336 | while ($diff->Next) { |
| 337 | next if $diff->Same; |
| 338 | my $sep = ''; |
| 339 | if (!$diff->Items(2)) { |
| 340 | printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 )); |
| 341 | } elsif(!$diff->Items(1)) { |
| 342 | printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 )); |
| 343 | } else { |
| 344 | $sep = "---\n"; |
| 345 | printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 )); |
| 346 | } |
| 347 | print "Delta< $_\n" for $diff->Items(1); |
| 348 | print $sep; |
| 349 | print "Corelist> $_\n" for $diff->Items(2); |
| 350 | } |
| 351 | |
| 352 | print "\n"; |
| 353 | } |
| 354 | } |
| 355 | |
| 356 | { |
| 357 | |
| 358 | package DeltaUpdater; |
| 359 | use List::Util 'reduce'; |
| 360 | |
| 361 | sub get_section_name_from_heading { |
| 362 | my $heading = shift; |
| 363 | while (my ($key, $expression) = each %sections) { |
| 364 | if ($heading =~ $expression) { |
| 365 | return $titles{$key}; |
| 366 | } |
| 367 | } |
| 368 | die "$heading did not match any section"; |
| 369 | } |
| 370 | |
| 371 | sub is_desired_section_name { |
| 372 | for (values %sections) { |
| 373 | return 1 if $_[0] =~ $_; |
| 374 | } |
| 375 | return 0; |
| 376 | } |
| 377 | |
| 378 | # verify the module and pragmata in the section, changing the stated version if necessary |
| 379 | # this subroutine warns if the module name cannot be parsed or if it is not listed in |
| 380 | # the results returned from corelist_delta() |
| 381 | # |
| 382 | # a side-effect of calling this function is that modules present in the section are |
| 383 | # removed from $data, resulting in $data containing only those modules and pragmata |
| 384 | # that were not listed in the perldelta file. This means we can then pass $data to |
| 385 | # add_to_section() without worrying about filtering out duplicates |
| 386 | sub update_section { |
| 387 | my ( $section, $data, $title ) = @_; |
| 388 | my @items = @{ $section->{items} }; |
| 389 | |
| 390 | for my $item (@items) { |
| 391 | |
| 392 | my $content = $item->{text}; |
| 393 | my $module = $item->{name}; |
| 394 | |
| 395 | #skip dummy items |
| 396 | next if !$module and $content =~ /\s*xx*\s*/i; |
| 397 | |
| 398 | say "Could not parse module name; line is:\n\t$content" and next unless $module; |
| 399 | |
| 400 | if ( !$data->{$title}{$module} ) { |
| 401 | print "$module is not listed as being $title in Module::CoreList.\n"; |
| 402 | print "Ensure Module::CoreList has been updated and\n"; |
| 403 | print "check to see that the distribution is not listed under another name.\n\n"; |
| 404 | next; |
| 405 | } |
| 406 | |
| 407 | if ( $title eq 'new' ) { |
| 408 | my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m; |
| 409 | say "Could not parse new version for $module; line is:\n\t$content" and next unless $new; |
| 410 | if ( $data->{$title}{$module}[2] ne $new ) { |
| 411 | say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2]; |
| 412 | } |
| 413 | $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me; |
| 414 | } |
| 415 | |
| 416 | elsif ( $title eq 'updated' ) { |
| 417 | my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s; |
| 418 | say "Could not parse old and new version for $module; line is:\n\t$content" and next |
| 419 | unless $prev and $new; |
| 420 | if ( $data->{$title}{$module}[1] ne $prev ) { |
| 421 | say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1]; |
| 422 | } |
| 423 | if ( $data->{$title}{$module}[2] ne $new ) { |
| 424 | say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2]; |
| 425 | } |
| 426 | $content =~ |
| 427 | s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se; |
| 428 | } |
| 429 | |
| 430 | elsif ( $title eq 'removed' ) { |
| 431 | my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m; |
| 432 | say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev; |
| 433 | if ( $data->{$title}{$module}[1] ne $prev ) { |
| 434 | say "$module: previous version differs; $prev " . $data->{$title}{$module}[1]; |
| 435 | } |
| 436 | $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me; |
| 437 | } |
| 438 | |
| 439 | delete $data->{$title}{$module}; |
| 440 | $item->{text} = $content; |
| 441 | } |
| 442 | return $section; |
| 443 | } |
| 444 | |
| 445 | # add modules and pragmata present in $data to the section |
| 446 | sub add_to_section { |
| 447 | my ( $section, $data, $title ) = @_; |
| 448 | |
| 449 | #undef is a valid version name in Module::CoreList so suppress warnings about concatenating undef values |
| 450 | no warnings 'uninitialized'; |
| 451 | for ( values %{ $data->{$title} } ) { |
| 452 | my ( $mod, $old_v, $new_v ) = @{$_}; |
| 453 | my ( $item, $text ); |
| 454 | |
| 455 | $item = { name => $mod, text => "=item *\n" }; |
| 456 | if ( $title eq 'new' ) { |
| 457 | $text = "L<$mod> $new_v has been added to the Perl core.\n"; |
| 458 | } |
| 459 | |
| 460 | elsif ( $title eq 'updated' ) { |
| 461 | $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n"; |
| 462 | if ( $deprecated->{$mod} ) { |
| 463 | $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n"; |
| 464 | } |
| 465 | } |
| 466 | |
| 467 | elsif ( $title eq 'removed' ) { |
| 468 | $text = "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n"; |
| 469 | } |
| 470 | |
| 471 | $item->{text} .= "\n$text\n"; |
| 472 | push @{ $section->{items} }, $item; |
| 473 | } |
| 474 | return $section; |
| 475 | } |
| 476 | |
| 477 | sub sort_items_in_section { |
| 478 | my ($section) = @_; |
| 479 | |
| 480 | # if we could not parse the module name, it will be uninitalized |
| 481 | # in sort. This is not a problem as it will just result in these |
| 482 | # sections being placed near the beginning of the section |
| 483 | no warnings 'uninitialized'; |
| 484 | $section->{items} = |
| 485 | [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ]; |
| 486 | return $section; |
| 487 | } |
| 488 | |
| 489 | # given a hashref of the form returned by corelist_delta() |
| 490 | # and a hash structured as documented in transform_pod(), it returns |
| 491 | # a pod string representation of the sections, creating sections |
| 492 | # if necessary |
| 493 | sub sections_to_pod { |
| 494 | my ( $data, %sections ) = @_; |
| 495 | my $out = ''; |
| 496 | |
| 497 | for ( |
| 498 | ( |
| 499 | [ 'New Modules and Pragmata', 'new' ], |
| 500 | [ 'Updated Modules and Pragmata', 'updated' ], |
| 501 | [ 'Removed Modules and Pragmata', 'removed' ] |
| 502 | ) |
| 503 | ) |
| 504 | { |
| 505 | my ( $section_name, $title ) = @{$_}; |
| 506 | |
| 507 | my $section = $sections{$section_name} // { |
| 508 | name => $section_name, |
| 509 | preceding_text => "=head2 $_->[0]\n=over 4\n", |
| 510 | following_text => "=back\n", |
| 511 | items => [], |
| 512 | manual => 1 |
| 513 | }; |
| 514 | |
| 515 | $section = update_section( $section, $data, $title ); |
| 516 | $section = add_to_section( $section, $data, $title ); |
| 517 | $section = sort_items_in_section( $section ); |
| 518 | |
| 519 | next if $section->{manual} and scalar @{ $section->{items} } == 0; |
| 520 | |
| 521 | my $items = reduce { no warnings 'once'; $a . $b->{text} } |
| 522 | ( '', @{ $section->{items} } ); |
| 523 | $out .= |
| 524 | ( $section->{preceding_text} // '' ) |
| 525 | . $items |
| 526 | . ( $section->{following_text} // '' ); |
| 527 | } |
| 528 | return $out; |
| 529 | } |
| 530 | |
| 531 | # given a filename corresponding to an existing perldelta file |
| 532 | # and a hashref of the form returned by corelist_delta(), it |
| 533 | # returns a string of the resulting file after the module |
| 534 | # information has been added. |
| 535 | sub transform_pod { |
| 536 | my ( $existing, $data ) = @_; |
| 537 | |
| 538 | # will contain hashrefs corresponding to new, updated and removed |
| 539 | # modules and pragmata keyed by section name |
| 540 | # each section is hashref of the structure |
| 541 | # preceding_text => Text occurring before and including the over |
| 542 | # region containing the list of modules, |
| 543 | # items => [Arrayref of hashrefs corresponding to a module |
| 544 | # entry], |
| 545 | # an entry has the form: |
| 546 | # name => Module name or undef if the name could not be determined |
| 547 | # text => The text of the entry, including the item heading |
| 548 | # |
| 549 | # following_text => Any text not corresponding to a module |
| 550 | # that occurs after the first module |
| 551 | # |
| 552 | # the sections are converted to a pod string by calling sections_to_pod() |
| 553 | my %sections; |
| 554 | |
| 555 | # we are in the Modules_and_Pragmata's section |
| 556 | my $in_Modules_and_Pragmata; |
| 557 | |
| 558 | # we are the Modules_and_Pragmata's section but have not |
| 559 | # encountered any of the desired sections. We use this |
| 560 | # flag to determine whether we should append the text to $out |
| 561 | # or we need to delay appending until the module listings are |
| 562 | # processed and instead append to $append_to_out |
| 563 | my $in_Modules_and_Pragmata_preamble; |
| 564 | |
| 565 | my $done_processing_Modules_and_Pragmata; |
| 566 | |
| 567 | my $current_section; |
| 568 | |
| 569 | # $nested_element_level == 0 : not in an over region, treat lines as text |
| 570 | # $nested_element_level == 1 : presumably in the top over region that |
| 571 | # corresponds to the module listing. Treat |
| 572 | # each item as a module |
| 573 | # $nested_element_level > 1 : we only consider these values when we are in an item |
| 574 | # We treat lines as the text of the current item. |
| 575 | my $nested_element_level = 0; |
| 576 | |
| 577 | my $current_item; |
| 578 | my $need_to_parse_module_name; |
| 579 | |
| 580 | my $out = ''; |
| 581 | my $append_to_out = ''; |
| 582 | |
| 583 | open my $fh, '<', $existing or die "can't open perldelta file $existing: $!"; |
| 584 | binmode($fh); |
| 585 | |
| 586 | while (<$fh>) { |
| 587 | # treat the rest of the file as plain text |
| 588 | if ($done_processing_Modules_and_Pragmata) { |
| 589 | $out .= $_; |
| 590 | next; |
| 591 | } |
| 592 | |
| 593 | elsif ( !$in_Modules_and_Pragmata ) { |
| 594 | # entering Modules and Pragmata |
| 595 | if (/^=head1 Modules and Pragmata/) { |
| 596 | $in_Modules_and_Pragmata = 1; |
| 597 | $in_Modules_and_Pragmata_preamble = 1; |
| 598 | } |
| 599 | $out .= $_; |
| 600 | next; |
| 601 | } |
| 602 | |
| 603 | # leaving Modules and Pragmata |
| 604 | elsif (/^=head1/) { |
| 605 | if ($current_section) { |
| 606 | push @{ $current_section->{items} }, $current_item |
| 607 | if $current_item; |
| 608 | $sections{ $current_section->{name} } = $current_section; |
| 609 | } |
| 610 | $done_processing_Modules_and_Pragmata = 1; |
| 611 | $out .= |
| 612 | sections_to_pod( $data, %sections ) . $append_to_out . $_; |
| 613 | next; |
| 614 | } |
| 615 | |
| 616 | # new section in Modules and Pragmata |
| 617 | elsif (/^=head2 (.*?)$/) { |
| 618 | my $name = $1; |
| 619 | if ($current_section) { |
| 620 | push @{ $current_section->{items} }, $current_item |
| 621 | if $current_item; |
| 622 | $sections{ $current_section->{name} } = $current_section; |
| 623 | undef $current_section; |
| 624 | } |
| 625 | |
| 626 | if ( is_desired_section_name($name) ) { |
| 627 | undef $in_Modules_and_Pragmata_preamble; |
| 628 | if ( $nested_element_level > 0 ) { |
| 629 | die "Unexpected head2 at line no. $."; |
| 630 | } |
| 631 | my $title = get_section_name_from_heading($name); |
| 632 | if ( exists $sections{$title} ) { |
| 633 | die "$name occurred twice at line no. $."; |
| 634 | } |
| 635 | $current_section = {}; |
| 636 | $current_section->{name} = $title; |
| 637 | $current_section->{preceding_text} = $_; |
| 638 | $current_section->{items} = []; |
| 639 | $nested_element_level = 0; |
| 640 | next; |
| 641 | } |
| 642 | |
| 643 | # otherwise treat section as plain text |
| 644 | else { |
| 645 | if ($in_Modules_and_Pragmata_preamble) { |
| 646 | $out .= $_; |
| 647 | } |
| 648 | else { |
| 649 | $append_to_out .= $_; |
| 650 | } |
| 651 | next; |
| 652 | } |
| 653 | } |
| 654 | |
| 655 | elsif ($current_section) { |
| 656 | |
| 657 | # not in an over region |
| 658 | if ( $nested_element_level == 0 ) { |
| 659 | if (/^=over/) { |
| 660 | $nested_element_level++; |
| 661 | } |
| 662 | if ( scalar @{ $current_section->{items} } > 0 ) { |
| 663 | $current_section->{following_text} .= $_; |
| 664 | } |
| 665 | else { |
| 666 | $current_section->{preceding_text} .= $_; |
| 667 | } |
| 668 | next; |
| 669 | } |
| 670 | |
| 671 | if ($current_item) { |
| 672 | if ($need_to_parse_module_name) { |
| 673 | # the item may not have a parsable module name, which means that |
| 674 | # $current_item->{name} will never be defined. |
| 675 | if (/^(?:L|C)<(.+?)>/) { |
| 676 | $current_item->{name} = $1; |
| 677 | undef $need_to_parse_module_name; |
| 678 | } |
| 679 | # =item or =back signals the end of an item |
| 680 | # block, which we handle below |
| 681 | if ( !/^=(?:item|back)/ ) { |
| 682 | $current_item->{text} .= $_; |
| 683 | next; |
| 684 | } |
| 685 | } |
| 686 | # currently in an over region |
| 687 | # treat text inside region as plain text |
| 688 | if ( $nested_element_level > 1 ) { |
| 689 | if (/^=back/) { |
| 690 | $nested_element_level--; |
| 691 | } |
| 692 | elsif (/^=over/) { |
| 693 | $nested_element_level++; |
| 694 | } |
| 695 | $current_item->{text} .= $_; |
| 696 | next; |
| 697 | } |
| 698 | # entering over region |
| 699 | if (/^=over/) { |
| 700 | $nested_element_level++; |
| 701 | $current_item->{text} .= $_; |
| 702 | next; |
| 703 | } |
| 704 | # =item or =back signals the end of an item |
| 705 | # block, which we handle below |
| 706 | if ( !/^=(?:item|back)/ ) { |
| 707 | $current_item->{text} .= $_; |
| 708 | next; |
| 709 | } |
| 710 | } |
| 711 | |
| 712 | if (/^=item \*/) { |
| 713 | push @{ $current_section->{items} }, $current_item |
| 714 | if $current_item; |
| 715 | $current_item = { text => $_ }; |
| 716 | $need_to_parse_module_name = 1; |
| 717 | next; |
| 718 | } |
| 719 | |
| 720 | if (/^=back/) { |
| 721 | push @{ $current_section->{items} }, $current_item |
| 722 | if $current_item; |
| 723 | undef $current_item; |
| 724 | $nested_element_level--; |
| 725 | } |
| 726 | |
| 727 | if ( scalar @{ $current_section->{items} } == 0 ) { |
| 728 | $current_section->{preceding_text} .= $_; |
| 729 | } |
| 730 | else { |
| 731 | $current_section->{following_text} .= $_; |
| 732 | } |
| 733 | next; |
| 734 | } |
| 735 | |
| 736 | # text in Modules and Pragmata not in a head2 region |
| 737 | else { |
| 738 | if ($in_Modules_and_Pragmata_preamble) { |
| 739 | $out .= $_; |
| 740 | } |
| 741 | else { |
| 742 | $append_to_out .= $_; |
| 743 | } |
| 744 | next; |
| 745 | } |
| 746 | } |
| 747 | close $fh; |
| 748 | die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata; |
| 749 | return $out; |
| 750 | } |
| 751 | |
| 752 | } |
| 753 | |
| 754 | { |
| 755 | package DeltaParser; |
| 756 | use Pod::Simple::SimpleTree; |
| 757 | |
| 758 | sub new { |
| 759 | my ($class, $input) = @_; |
| 760 | |
| 761 | my $self = bless {} => $class; |
| 762 | |
| 763 | my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root; |
| 764 | splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure, |
| 765 | # just the nodes within it |
| 766 | |
| 767 | $self->_parse_delta($parsed_pod); |
| 768 | |
| 769 | return $self; |
| 770 | } |
| 771 | |
| 772 | # creates the accessor methods: |
| 773 | # new_modules |
| 774 | # updated_modules |
| 775 | # removed_modules |
| 776 | for my $k (keys %sections) { |
| 777 | no strict 'refs'; |
| 778 | my $m = "${k}_modules"; |
| 779 | *$m = sub { $_[0]->{$m} }; |
| 780 | } |
| 781 | |
| 782 | sub _parse_delta { |
| 783 | my ($self, $pod) = @_; |
| 784 | |
| 785 | my $new_section = $self->_look_for_section( $pod, $sections{new} ); |
| 786 | my $updated_section = $self->_look_for_section( $pod, $sections{updated} ); |
| 787 | my $removed_section = $self->_look_for_section( $pod, $sections{removed} ); |
| 788 | |
| 789 | $self->_parse_new_section($new_section); |
| 790 | $self->_parse_updated_section($updated_section); |
| 791 | $self->_parse_removed_section($removed_section); |
| 792 | |
| 793 | for (qw/new_modules updated_modules removed_modules/) { |
| 794 | $self->{$_} = |
| 795 | [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ]; |
| 796 | } |
| 797 | |
| 798 | return; |
| 799 | } |
| 800 | |
| 801 | sub _parse_new_section { |
| 802 | my ($self, $section) = @_; |
| 803 | |
| 804 | $self->{new_modules} = []; |
| 805 | return unless $section; |
| 806 | $self->{new_modules} = $self->_parse_section($section => sub { |
| 807 | my ($el) = @_; |
| 808 | |
| 809 | my ($first, $second) = @{ $el }[2, 3]; |
| 810 | my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/; |
| 811 | |
| 812 | return [ $first->[2], undef, $ver ]; |
| 813 | }); |
| 814 | |
| 815 | return; |
| 816 | } |
| 817 | |
| 818 | sub _parse_updated_section { |
| 819 | my ($self, $section) = @_; |
| 820 | |
| 821 | $self->{updated_modules} = []; |
| 822 | return unless $section; |
| 823 | $self->{updated_modules} = $self->_parse_section($section => sub { |
| 824 | my ($el) = @_; |
| 825 | |
| 826 | my ($first, $second) = @{ $el }[2, 3]; |
| 827 | my $module = $first->[2]; |
| 828 | |
| 829 | # the regular expression matches the following: |
| 830 | # from VERSION_NUMBER to VERSION_NUMBER |
| 831 | # from VERSION_NUMBER to VERSION_NUMBER. |
| 832 | # from version VERSION_NUMBER to version VERSION_NUMBER. |
| 833 | # from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER |
| 834 | # from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER |
| 835 | # |
| 836 | # some perldeltas contain more than one module listed in an entry, this only attempts to match the |
| 837 | # first module |
| 838 | my ($old, $new) = $second =~ |
| 839 | /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s; |
| 840 | |
| 841 | warn "Unable to extract old or new version of $module from perldelta" |
| 842 | if !defined $old || !defined $new; |
| 843 | |
| 844 | return [ $module, $old, $new ]; |
| 845 | }); |
| 846 | |
| 847 | return; |
| 848 | } |
| 849 | |
| 850 | sub _parse_removed_section { |
| 851 | my ($self, $section) = @_; |
| 852 | |
| 853 | $self->{removed_modules} = []; |
| 854 | return unless $section; |
| 855 | $self->{removed_modules} = $self->_parse_section($section => sub { |
| 856 | my ($el) = @_; |
| 857 | |
| 858 | my ($first, $second) = @{ $el }[2, 3]; |
| 859 | my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/; |
| 860 | |
| 861 | return [ $first->[2], $old, undef ]; |
| 862 | }); |
| 863 | |
| 864 | return; |
| 865 | } |
| 866 | |
| 867 | sub _parse_section { |
| 868 | my ($self, $section, $parser) = @_; |
| 869 | |
| 870 | my $items = $self->_look_down($section => sub { |
| 871 | my ($el) = @_; |
| 872 | return unless ref $el && $el->[0] =~ /^item-/ |
| 873 | && @{ $el } > 2 && ref $el->[2]; |
| 874 | return unless $el->[2]->[0] =~ /C|L/; |
| 875 | |
| 876 | return 1; |
| 877 | }); |
| 878 | |
| 879 | return [map { $parser->($_) } @{ $items }]; |
| 880 | } |
| 881 | |
| 882 | sub _look_down { |
| 883 | my ($self, $pod, $predicate) = @_; |
| 884 | my @pod = @{ $pod }; |
| 885 | |
| 886 | my @l; |
| 887 | while (my $el = shift @pod) { |
| 888 | push @l, $el if $predicate->($el); |
| 889 | if (ref $el) { |
| 890 | my @el = @{ $el }; |
| 891 | splice @el, 0, 2; |
| 892 | unshift @pod, @el if @el; |
| 893 | } |
| 894 | } |
| 895 | |
| 896 | return @l ? \@l : undef; |
| 897 | } |
| 898 | |
| 899 | sub _look_for_section { |
| 900 | my ($self, $pod, $section) = @_; |
| 901 | |
| 902 | my $level; |
| 903 | $self->_look_for_range($pod, |
| 904 | sub { |
| 905 | my ($el) = @_; |
| 906 | my ($heading) = $el->[0] =~ /^head(\d)$/; |
| 907 | my $f = $heading && $el->[2] =~ /^$section/; |
| 908 | $level = $heading if $f && !$level; |
| 909 | return $f; |
| 910 | }, |
| 911 | sub { |
| 912 | my ($el) = @_; |
| 913 | $el->[0] =~ /^head(\d)$/ && $1 <= $level; |
| 914 | }, |
| 915 | ); |
| 916 | } |
| 917 | |
| 918 | sub _look_for_range { |
| 919 | my ($self, $pod, $start_predicate, $stop_predicate) = @_; |
| 920 | |
| 921 | my @l; |
| 922 | for my $el (@{ $pod }) { |
| 923 | if (@l) { |
| 924 | return \@l if $stop_predicate->($el); |
| 925 | } |
| 926 | else { |
| 927 | next unless $start_predicate->($el); |
| 928 | } |
| 929 | push @l, $el; |
| 930 | } |
| 931 | |
| 932 | return; |
| 933 | } |
| 934 | } |
| 935 | |
| 936 | run; |