Porting/corelist-perldelta.pl

   1 #!perl
   2 use 5.010;
   3 use strict;
   4 use warnings;
   5 use lib 'Porting';
   6 use Maintainers qw/%Modules/;
   7 use lib 'dist/Module-CoreList/lib';
   8 use Module::CoreList;
   9 use Getopt::Long;
  10
  11 =head1 USAGE
  12
  13   # generate the module changes for the Perl you are currently building
  14   ./perl -Ilib Porting/corelist-perldelta.pl
  15
  16   # update the module changes for the Perl you are currently building
  17   ./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod
  18
  19   # generate a diff between the corelist sections of two perldelta* files:
  20   perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod
  21
  22 =head1 ABOUT
  23
  24 corelist-perldelta.pl is a bit schizophrenic. The part to generate the
  25 new Perldelta text does not need Algorithm::Diff, but wants to be
  26 run with the freshly built Perl.
  27
  28 The part to check the diff wants to be run with a Perl that has an up-to-date
  29 L<Module::CoreList>, but needs the outside L<Algorithm::Diff>.
  30
  31 Ideally, the program will be split into two separate programs, one
  32 to generate the text and one to show the diff between the
  33 corelist sections of the last perldelta and the next perldelta.
  34
  35 Currently no information about Removed Modules is displayed in any of the
  36 modes.
  37
  38 =cut
  39
  40 my %sections = (
  41   new     => qr/New Modules and Pragma(ta)?/,
  42   updated => qr/Updated Modules and Pragma(ta)?/,
  43   removed => qr/Removed Modules and Pragma(ta)?/,
  44 );
  45
  46 my %titles = (
  47   new     => 'New Modules and Pragmata',
  48   updated => 'Updated Modules and Pragmata',
  49   removed => 'Removed Modules and Pragmata',
  50 );
  51
  52 my $deprecated;
  53
  54 sub run {
  55   my %opt = (mode => 'generate');
  56
  57   GetOptions(\%opt,
  58     'mode|m:s', # 'generate', 'check', 'update'
  59   );
  60
  61   # by default, compare latest two version in CoreList;
  62   my ($old, $new) = latest_two_perl_versions();
  63
  64   # use the provided versions if present
  65   # @ARGV >=2 means [old_version] [new_version] [path/to/file]
  66   if ( @ARGV >= 2) {
  67     ($old, $new) = (shift @ARGV, shift @ARGV);
  68     die "$old is an invalid version\n" if not exists
  69       $Module::CoreList::version{$old};
  70     die "$new is an invalid version\n" if not exists
  71       $Module::CoreList::version{$new};
  72   }
  73
  74   if ( $opt{mode} eq 'generate' ) {
  75     do_generate($old => $new);
  76   }
  77   elsif ( $opt{mode} eq 'check' ) {
  78     do_check(\*ARGV, $old => $new);
  79   }
  80   elsif ( $opt{mode} eq 'update' ) {
  81     do_update_existing(shift @ARGV, $old => $new);
  82   }
  83   else {
  84     die "Unrecognized mode '$opt{mode}'\n";
  85   }
  86
  87   exit 0;
  88 }
  89
  90 sub latest_two_perl_versions {
  91
  92   my @versions = sort keys %Module::CoreList::version;
  93
  94   my $new = pop @versions;
  95
  96   # If a fully-padded version number ends in a zero (as in "5.019010"), that
  97   # version shows up in %Module::CoreList::version both with and without its
  98   # trailing zeros. So skip all versions that are numerically equal to $new.
  99   pop @versions while @versions && $versions[-1] == $new;
 100
 101   die "Too few distinct core versions in %Module::CoreList::version ?!\n"
 102     if !@versions;
 103
 104   return $versions[-1], $new;
 105 }
 106
 107 # Given two perl versions, it returns a list describing the core distributions that have changed.
 108 # The first three elements are hashrefs corresponding to new, updated, and removed modules
 109 # and are of the form (mostly, see the special remarks about removed):
 110 #   'Distribution Name' => ['Distribution Name', previous version number, current version number]
 111 # where the version number is undef if the distribution did not exist.
 112 # The fourth element is an arrayref of core distribution names of those distribution for which it
 113 # is unknown whether they have changed and therefore need to be manually checked.
 114 #
 115 # In most cases, the distribution name in %Modules corresponds to the module that is representative
 116 # of the distribution as listed in Module::CoreList. However, there are a few distribution names
 117 # that do not correspond to a module. %distToModules has been created which maps the distribution
 118 # name to a representative module. The representative module was chosen by either looking at the
 119 # Makefile of the distribution or by seeing which module the distribution has been traditionally
 120 # listed under in past perldeltas.
 121 #
 122 # There are a few distributions for which there is no single representative module (e.g. libnet).
 123 # These distributions are returned as the last element of the list.
 124 #
 125 # %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p.
 126 # This list contains modules and pragmata that may also be present in Module::CoreList.
 127 # A list of modules are in the list @unclaimedModules, which were manually listed based on whether
 128 # they were independent modules and whether they have been listed in past perldeltas.
 129 # The pragmata were found by doing something like:
 130 #   say for sort grep { $_ eq lc $_ and !exists $Modules{$_}}
 131 #     keys %{$Module::CoreList::version{'5.019003'}}
 132 # and manually filtering out pragamata that were already covered.
 133 #
 134 # It is currently not possible to differentiate between a removed module and a removed
 135 # distribution. Therefore, the removed hashref contains every module that has been removed, even if
 136 # the module's corresponding distribution has not been removed.
 137
 138 sub corelist_delta {
 139   my ($old, $new) = @_;
 140   my $corelist = \%Module::CoreList::version;
 141   my %changes = Module::CoreList::changes_between( $old, $new );
 142   $deprecated = $Module::CoreList::deprecated{$new};
 143
 144   my $getModifyType = sub {
 145     my $data = shift;
 146     if ( exists $data->{left} and exists $data->{right} ) {
 147       return 'updated';
 148     }
 149     elsif ( !exists $data->{left} and exists $data->{right} ) {
 150       return 'new';
 151     }
 152     elsif ( exists $data->{left} and !exists $data->{right} ) {
 153       return 'removed';
 154     }
 155     return undef;
 156   };
 157
 158   my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap Win32CORE/;
 159   my @unclaimedPragmata = qw/arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/;
 160   my @unclaimed = (@unclaimedModules, @unclaimedPragmata);
 161
 162   my %distToModules = (
 163     'IO-Compress' => [
 164       {
 165         'name' => 'IO-Compress',
 166         'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ),
 167         'data' => $changes{'IO::Compress::Base'}
 168       }
 169     ],
 170     'Locale-Codes' => [
 171       {
 172         'name'         => 'Locale::Codes',
 173         'modification' => $getModifyType->( $changes{'Locale::Codes'} ),
 174         'data'         => $changes{'Locale::Codes'}
 175       }
 176     ],
 177     'PathTools' => [
 178       {
 179         'name'         => 'File::Spec',
 180         'modification' => $getModifyType->( $changes{'Cwd'} ),
 181         'data'         => $changes{'Cwd'}
 182       }
 183     ],
 184     'Scalar-List-Utils' => [
 185       {
 186         'name'         => 'List::Util',
 187         'modification' => $getModifyType->( $changes{'List::Util'} ),
 188         'data'         => $changes{'List::Util'}
 189       },
 190       {
 191         'name'         => 'Scalar::Util',
 192         'modification' => $getModifyType->( $changes{'Scalar::Util'} ),
 193         'data'         => $changes{'Scalar::Util'}
 194       }
 195     ],
 196     'Text-Tabs+Wrap' => [
 197       {
 198         'name'         => 'Text::Tabs',
 199         'modification' => $getModifyType->( $changes{'Text::Tabs'} ),
 200         'data'         => $changes{'Text::Tabs'}
 201       },
 202       {
 203         'name'         => 'Text::Wrap',
 204         'modification' => $getModifyType->( $changes{'Text::Wrap'} ),
 205         'data'         => $changes{'Text::Wrap'}
 206       }
 207     ],
 208   );
 209
 210   # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ]
 211   my $deltaGrouping = {};
 212
 213   # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it
 214   my @manuallyCheck;
 215
 216   # %Modules defines what is currently in core
 217   for my $k ( keys %Modules ) {
 218     next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed
 219     next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed
 220
 221     my ( $distName, $modifyType, $data );
 222
 223     if ( exists $changes{$k} ) {
 224       $distName   = $k;
 225       $modifyType = $getModifyType->( $changes{$k} );
 226       $data       = $changes{$k};
 227     }
 228     elsif ( exists $distToModules{$k} ) {
 229       # modification will be undef if the distribution has not changed
 230       my @modules = grep { $_->{modification} } @{ $distToModules{$k} };
 231       for (@modules) {
 232         $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ];
 233       }
 234       next;
 235     }
 236     else {
 237       push @manuallyCheck, $k and next;
 238     }
 239
 240     $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ];
 241   }
 242
 243   for my $k (@unclaimed) {
 244     if ( exists $changes{$k} ) {
 245       $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} =
 246         [ $k, $changes{$k}->{left}, $changes{$k}->{right} ];
 247     }
 248   }
 249
 250   # in old corelist, but not this one => removed
 251   # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from
 252   # distributions will show up here, too.  Some person will have to review to see what's
 253   # important. That's the best we can do without a historical Maintainers.pl
 254   for my $k ( keys %{ $corelist->{$old} } ) {
 255     if ( ! exists $corelist->{$new}{$k} ) {
 256       $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ];
 257     }
 258   }
 259
 260   return (
 261     \%{ $deltaGrouping->{'new'} },
 262     \%{ $deltaGrouping->{'removed'} },
 263     \%{ $deltaGrouping->{'updated'} },
 264     \@manuallyCheck
 265   );
 266 }
 267
 268 # currently does not update the Removed Module section
 269 sub do_update_existing {
 270   my ( $existing, $old, $new ) = @_;
 271
 272   my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new );
 273   if ($manuallyCheck) {
 274     print "It cannot be determined whether the following distributions have changed.\n";
 275     print "Please check and list accordingly:\n";
 276     say "\t* $_" for sort @{$manuallyCheck};
 277     print "\n";
 278   }
 279
 280   my $data = {
 281     new      => $added,
 282     updated  => $updated,
 283     #removed => $removed, ignore removed for now
 284   };
 285
 286   my $text = DeltaUpdater::transform_pod( $existing, $data );
 287   open my $out, '>', $existing or die "can't open perldelta file $existing: $!";
 288   binmode($out);
 289   print $out $text;
 290   close $out;
 291   say "The New and Updated Modules and Pragamata sections in $existing have been updated";
 292   say "Please ensure the Removed Modules and Pragmata section is up-to-date";
 293 }
 294
 295 sub do_generate {
 296   my ($old, $new) = @_;
 297   my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new);
 298
 299   if ($manuallyCheck) {
 300     print "\nXXXIt cannot be determined whether the following distributions have changed.\n";
 301     print "Please check and list accordingly:\n";
 302     say "\t$_" for @{$manuallyCheck};
 303     print "\n";
 304   }
 305
 306   my $data = {
 307     new      => $added,
 308     updated  => $updated,
 309     #removed => $removed, ignore removed for now
 310   };
 311
 312   say DeltaUpdater::sections_to_pod($data)
 313 }
 314
 315 sub do_check {
 316   my ($in, $old, $new) = @_;
 317
 318   my $delta = DeltaParser->new($in);
 319   my ($added, $removed, $updated) = corelist_delta($old => $new);
 320
 321   # because of the difficulty in identifying the distribution for removed modules
 322   # don't bother checking them
 323   for my $ck ([ 'new', $delta->new_modules, $added ],
 324               #[ 'removed', $delta->removed_modules, $removed ],
 325               [ 'updated', $delta->updated_modules, $updated ] ) {
 326     my @delta = @{ $ck->[1] };
 327     my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] };
 328
 329     printf $ck->[0] . ":\n";
 330
 331     require Algorithm::Diff;
 332     my $diff = Algorithm::Diff->new(map {
 333       [map { join q{ } => grep defined, @{ $_ } } @{ $_ }]
 334     } \@delta, \@corelist);
 335
 336     while ($diff->Next) {
 337       next if $diff->Same;
 338       my $sep = '';
 339       if (!$diff->Items(2)) {
 340         printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 ));
 341       } elsif(!$diff->Items(1)) {
 342         printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 ));
 343       } else {
 344         $sep = "---\n";
 345         printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 ));
 346       }
 347       print "Delta< $_\n" for $diff->Items(1);
 348       print $sep;
 349       print "Corelist> $_\n" for $diff->Items(2);
 350     }
 351
 352     print "\n";
 353   }
 354 }
 355
 356 {
 357
 358   package DeltaUpdater;
 359   use List::Util 'reduce';
 360
 361   sub get_section_name_from_heading {
 362     my $heading = shift;
 363     while (my ($key, $expression) = each %sections) {
 364       if ($heading =~ $expression) {
 365         return $titles{$key};
 366       }
 367     }
 368     die "$heading did not match any section";
 369   }
 370
 371   sub is_desired_section_name {
 372     for (values %sections) {
 373       return 1 if $_[0] =~ $_;
 374     }
 375     return 0;
 376   }
 377
 378   # verify the module and pragmata in the section, changing the stated version if necessary
 379   # this subroutine warns if the module name cannot be parsed or if it is not listed in
 380   # the results returned from corelist_delta()
 381   #
 382   # a side-effect of calling this function is that modules present in the section are
 383   # removed from $data, resulting in $data containing only those modules and pragmata
 384   # that were not listed in the perldelta file. This means we can then pass $data to
 385   # add_to_section() without worrying about filtering out duplicates
 386   sub update_section {
 387     my ( $section, $data, $title ) = @_;
 388     my @items = @{ $section->{items} };
 389
 390     for my $item (@items) {
 391
 392       my $content = $item->{text};
 393       my $module  = $item->{name};
 394
 395       #skip dummy items
 396       next if !$module and $content =~ /\s*xx*\s*/i;
 397
 398       say "Could not parse module name; line is:\n\t$content" and next unless $module;
 399
 400       if ( !$data->{$title}{$module} ) {
 401         print "$module is not listed as being $title in Module::CoreList.\n";
 402         print "Ensure Module::CoreList has been updated and\n";
 403         print "check to see that the distribution is not listed under another name.\n\n";
 404         next;
 405       }
 406
 407       if ( $title eq 'new' ) {
 408         my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m;
 409         say "Could not parse new version for $module; line is:\n\t$content" and next unless $new;
 410         if ( $data->{$title}{$module}[2] ne $new ) {
 411             say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
 412         }
 413         $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me;
 414       }
 415
 416       elsif ( $title eq 'updated' ) {
 417         my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
 418         say "Could not parse old and new version for $module; line is:\n\t$content" and next
 419           unless $prev and $new;
 420         if ( $data->{$title}{$module}[1] ne $prev ) {
 421           say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1];
 422         }
 423         if ( $data->{$title}{$module}[2] ne $new ) {
 424           say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
 425         }
 426         $content =~
 427           s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se;
 428       }
 429
 430       elsif ( $title eq 'removed' ) {
 431         my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m;
 432         say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev;
 433         if ( $data->{$title}{$module}[1] ne $prev ) {
 434           say "$module: previous version differs; $prev " . $data->{$title}{$module}[1];
 435         }
 436         $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me;
 437       }
 438
 439       delete $data->{$title}{$module};
 440       $item->{text} = $content;
 441     }
 442     return $section;
 443   }
 444
 445   # add modules and pragmata present in $data to the section
 446   sub add_to_section {
 447     my ( $section, $data, $title ) = @_;
 448
 449     #undef is a valid version name in Module::CoreList so suppress warnings about concatenating undef values
 450     no warnings 'uninitialized';
 451     for ( values %{ $data->{$title} } ) {
 452       my ( $mod, $old_v, $new_v ) = @{$_};
 453       my ( $item, $text );
 454
 455       $item = { name => $mod, text => "=item *\n" };
 456       if ( $title eq 'new' ) {
 457         $text = "L<$mod> $new_v has been added to the Perl core.\n";
 458       }
 459
 460       elsif ( $title eq 'updated' ) {
 461         $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n";
 462         if ( $deprecated->{$mod} ) {
 463           $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n";
 464         }
 465       }
 466
 467       elsif ( $title eq 'removed' ) {
 468         $text = "C<$mod> has been removed from the Perl core.  Prior version was $old_v.\n";
 469       }
 470
 471       $item->{text} .= "\n$text\n";
 472       push @{ $section->{items} }, $item;
 473     }
 474     return $section;
 475   }
 476
 477   sub sort_items_in_section {
 478     my ($section) = @_;
 479
 480     # if we could not parse the module name, it will be uninitalized
 481     # in sort. This is not a problem as it will just result in these
 482     # sections being placed near the beginning of the section
 483     no warnings 'uninitialized';
 484     $section->{items} =
 485       [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ];
 486     return $section;
 487   }
 488
 489   # given a hashref of the form returned by corelist_delta()
 490   # and a hash structured as documented in transform_pod(), it returns
 491   # a pod string representation of the sections, creating sections
 492   # if necessary
 493   sub sections_to_pod {
 494     my ( $data, %sections ) = @_;
 495     my $out = '';
 496
 497     for (
 498         (
 499           [ 'New Modules and Pragmata',     'new' ],
 500           [ 'Updated Modules and Pragmata', 'updated' ],
 501           [ 'Removed Modules and Pragmata', 'removed' ]
 502         )
 503       )
 504     {
 505       my ( $section_name, $title ) = @{$_};
 506
 507       my $section = $sections{$section_name} // {
 508           name           => $section_name,
 509           preceding_text => "=head2 $_->[0]\n=over 4\n",
 510           following_text => "=back\n",
 511           items          => [],
 512           manual         => 1
 513       };
 514
 515       $section = update_section( $section, $data, $title );
 516       $section = add_to_section( $section, $data, $title );
 517       $section = sort_items_in_section( $section );
 518
 519       next if $section->{manual} and scalar @{ $section->{items} } == 0;
 520
 521       my $items = reduce { no warnings 'once'; $a . $b->{text} }
 522         ( '', @{ $section->{items} } );
 523       $out .=
 524         ( $section->{preceding_text} // '' )
 525         . $items
 526         . ( $section->{following_text} // '' );
 527     }
 528     return $out;
 529   }
 530
 531   # given a filename corresponding to an existing perldelta file
 532   # and a hashref of the form returned by corelist_delta(), it
 533   # returns a string of the resulting file after the module
 534   # information has been added.
 535   sub transform_pod {
 536     my ( $existing, $data ) = @_;
 537
 538     # will contain hashrefs corresponding to new, updated and removed
 539     # modules and pragmata keyed by section name
 540     # each section is hashref of the structure
 541     #   preceding_text => Text occurring before and including the over
 542     #                     region containing the list of modules,
 543     #   items          => [Arrayref of hashrefs corresponding to a module
 544     #                      entry],
 545     #     an entry has the form:
 546     #       name => Module name or undef if the name could not be determined
 547     #       text => The text of the entry, including the item heading
 548     #
 549     #   following_text => Any text not corresponding to a module
 550     #                     that occurs after the first module
 551     #
 552     # the sections are converted to a pod string by calling sections_to_pod()
 553     my %sections;
 554
 555     # we are in the Modules_and_Pragmata's section
 556     my $in_Modules_and_Pragmata;
 557
 558     # we are the Modules_and_Pragmata's section but have not
 559     # encountered any of the desired sections. We use this
 560     # flag to determine whether we should append the text to $out
 561     # or we need to delay appending until the module listings are
 562     # processed and instead append to $append_to_out
 563     my $in_Modules_and_Pragmata_preamble;
 564
 565     my $done_processing_Modules_and_Pragmata;
 566
 567     my $current_section;
 568
 569     # $nested_element_level == 0 : not in an over region, treat lines as text
 570     # $nested_element_level == 1 : presumably in the top over region that
 571     #                              corresponds to the module listing. Treat
 572     #                              each item as a module
 573     # $nested_element_level > 1  : we only consider these values when we are in an item
 574     #                              We treat lines as the text of the current item.
 575     my $nested_element_level = 0;
 576
 577     my $current_item;
 578     my $need_to_parse_module_name;
 579
 580     my $out = '';
 581     my $append_to_out = '';
 582
 583     open my $fh, '<', $existing or die "can't open perldelta file $existing: $!";
 584     binmode($fh);
 585
 586     while (<$fh>) {
 587       # treat the rest of the file as plain text
 588       if ($done_processing_Modules_and_Pragmata) {
 589         $out .= $_;
 590         next;
 591       }
 592
 593       elsif ( !$in_Modules_and_Pragmata ) {
 594         # entering Modules and Pragmata
 595         if (/^=head1 Modules and Pragmata/) {
 596           $in_Modules_and_Pragmata          = 1;
 597           $in_Modules_and_Pragmata_preamble = 1;
 598         }
 599         $out .= $_;
 600         next;
 601       }
 602
 603       # leaving Modules and Pragmata
 604       elsif (/^=head1/) {
 605         if ($current_section) {
 606           push @{ $current_section->{items} }, $current_item
 607             if $current_item;
 608           $sections{ $current_section->{name} } = $current_section;
 609         }
 610         $done_processing_Modules_and_Pragmata = 1;
 611         $out .=
 612           sections_to_pod( $data, %sections ) . $append_to_out . $_;
 613         next;
 614       }
 615
 616       # new section in Modules and Pragmata
 617       elsif (/^=head2 (.*?)$/) {
 618         my $name = $1;
 619         if ($current_section) {
 620           push @{ $current_section->{items} }, $current_item
 621             if $current_item;
 622           $sections{ $current_section->{name} } = $current_section;
 623           undef $current_section;
 624         }
 625
 626         if ( is_desired_section_name($name) ) {
 627           undef $in_Modules_and_Pragmata_preamble;
 628           if ( $nested_element_level > 0 ) {
 629             die "Unexpected head2 at line no. $.";
 630           }
 631           my $title = get_section_name_from_heading($name);
 632           if ( exists $sections{$title} ) {
 633             die "$name occurred twice at line no. $.";
 634           }
 635           $current_section                   = {};
 636           $current_section->{name}           = $title;
 637           $current_section->{preceding_text} = $_;
 638           $current_section->{items}          = [];
 639          $nested_element_level               = 0;
 640           next;
 641         }
 642
 643         # otherwise treat section as plain text
 644         else {
 645           if ($in_Modules_and_Pragmata_preamble) {
 646             $out .= $_;
 647           }
 648           else {
 649             $append_to_out .= $_;
 650           }
 651           next;
 652         }
 653       }
 654
 655       elsif ($current_section) {
 656
 657         # not in an over region
 658         if ( $nested_element_level == 0 ) {
 659           if (/^=over/) {
 660             $nested_element_level++;
 661           }
 662           if ( scalar @{ $current_section->{items} } > 0 ) {
 663             $current_section->{following_text} .= $_;
 664           }
 665           else {
 666             $current_section->{preceding_text} .= $_;
 667           }
 668           next;
 669         }
 670
 671         if ($current_item) {
 672           if ($need_to_parse_module_name) {
 673             # the item may not have a parsable module name, which means that
 674             # $current_item->{name} will never be defined.
 675             if (/^(?:L|C)<(.+?)>/) {
 676               $current_item->{name} = $1;
 677               undef $need_to_parse_module_name;
 678             }
 679             # =item or =back signals the end of an item
 680             # block, which we handle below
 681             if ( !/^=(?:item|back)/ ) {
 682               $current_item->{text} .= $_;
 683               next;
 684             }
 685           }
 686           # currently in an over region
 687           # treat text inside region as plain text
 688           if ( $nested_element_level > 1 ) {
 689             if (/^=back/) {
 690               $nested_element_level--;
 691             }
 692             elsif (/^=over/) {
 693               $nested_element_level++;
 694             }
 695             $current_item->{text} .= $_;
 696             next;
 697           }
 698           # entering over region
 699           if (/^=over/) {
 700             $nested_element_level++;
 701             $current_item->{text} .= $_;
 702             next;
 703           }
 704           # =item or =back signals the end of an item
 705           # block, which we handle below
 706           if ( !/^=(?:item|back)/ ) {
 707             $current_item->{text} .= $_;
 708             next;
 709           }
 710         }
 711
 712         if (/^=item \*/) {
 713           push @{ $current_section->{items} }, $current_item
 714             if $current_item;
 715           $current_item = { text => $_ };
 716           $need_to_parse_module_name = 1;
 717           next;
 718         }
 719
 720         if (/^=back/) {
 721           push @{ $current_section->{items} }, $current_item
 722             if $current_item;
 723           undef $current_item;
 724           $nested_element_level--;
 725         }
 726
 727         if ( scalar @{ $current_section->{items} } == 0 ) {
 728           $current_section->{preceding_text} .= $_;
 729         }
 730         else {
 731           $current_section->{following_text} .= $_;
 732         }
 733         next;
 734       }
 735
 736       # text in Modules and Pragmata not in a head2 region
 737       else {
 738         if ($in_Modules_and_Pragmata_preamble) {
 739           $out .= $_;
 740         }
 741         else {
 742           $append_to_out .= $_;
 743         }
 744         next;
 745       }
 746     }
 747     close $fh;
 748     die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata;
 749     return $out;
 750   }
 751
 752 }
 753
 754 {
 755   package DeltaParser;
 756   use Pod::Simple::SimpleTree;
 757
 758   sub new {
 759     my ($class, $input) = @_;
 760
 761     my $self = bless {} => $class;
 762
 763     my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root;
 764     splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure,
 765                                    # just the nodes within it
 766
 767     $self->_parse_delta($parsed_pod);
 768
 769     return $self;
 770   }
 771
 772   # creates the accessor methods:
 773   #   new_modules
 774   #   updated_modules
 775   #   removed_modules
 776   for my $k (keys %sections) {
 777     no strict 'refs';
 778     my $m = "${k}_modules";
 779     *$m = sub { $_[0]->{$m} };
 780   }
 781
 782   sub _parse_delta {
 783     my ($self, $pod) = @_;
 784
 785     my $new_section     = $self->_look_for_section( $pod, $sections{new} );
 786     my $updated_section = $self->_look_for_section( $pod, $sections{updated} );
 787     my $removed_section = $self->_look_for_section( $pod, $sections{removed} );
 788
 789     $self->_parse_new_section($new_section);
 790     $self->_parse_updated_section($updated_section);
 791     $self->_parse_removed_section($removed_section);
 792
 793     for (qw/new_modules updated_modules removed_modules/) {
 794       $self->{$_} =
 795         [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ];
 796     }
 797
 798     return;
 799   }
 800
 801   sub _parse_new_section {
 802     my ($self, $section) = @_;
 803
 804     $self->{new_modules} = [];
 805     return unless $section;
 806     $self->{new_modules} = $self->_parse_section($section => sub {
 807       my ($el) = @_;
 808
 809       my ($first, $second) = @{ $el }[2, 3];
 810       my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/;
 811
 812       return [ $first->[2], undef, $ver ];
 813     });
 814
 815     return;
 816   }
 817
 818   sub _parse_updated_section {
 819     my ($self, $section) = @_;
 820
 821     $self->{updated_modules} = [];
 822     return unless $section;
 823     $self->{updated_modules} = $self->_parse_section($section => sub {
 824       my ($el) = @_;
 825
 826       my ($first, $second) = @{ $el }[2, 3];
 827       my $module = $first->[2];
 828
 829       # the regular expression matches the following:
 830       #   from VERSION_NUMBER to VERSION_NUMBER
 831       #   from VERSION_NUMBER to VERSION_NUMBER.
 832       #   from version VERSION_NUMBER to version VERSION_NUMBER.
 833       #   from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER
 834       #   from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER
 835       #
 836       # some perldeltas contain more than one module listed in an entry, this only attempts to match the
 837       # first module
 838       my ($old, $new) = $second =~
 839           /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
 840
 841       warn "Unable to extract old or new version of $module from perldelta"
 842         if !defined $old || !defined $new;
 843
 844       return [ $module, $old, $new ];
 845     });
 846
 847     return;
 848   }
 849
 850   sub _parse_removed_section {
 851     my ($self, $section) = @_;
 852
 853     $self->{removed_modules} = [];
 854     return unless $section;
 855     $self->{removed_modules} = $self->_parse_section($section => sub {
 856       my ($el) = @_;
 857
 858       my ($first, $second) = @{ $el }[2, 3];
 859       my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/;
 860
 861       return [ $first->[2], $old, undef ];
 862     });
 863
 864     return;
 865   }
 866
 867   sub _parse_section {
 868     my ($self, $section, $parser) = @_;
 869
 870     my $items = $self->_look_down($section => sub {
 871       my ($el) = @_;
 872       return unless ref $el && $el->[0] =~ /^item-/
 873           && @{ $el } > 2 && ref $el->[2];
 874       return unless $el->[2]->[0] =~ /C|L/;
 875
 876       return 1;
 877     });
 878
 879     return [map { $parser->($_) } @{ $items }];
 880   }
 881
 882   sub _look_down {
 883     my ($self, $pod, $predicate) = @_;
 884     my @pod = @{ $pod };
 885
 886     my @l;
 887     while (my $el = shift @pod) {
 888       push @l, $el if $predicate->($el);
 889       if (ref $el) {
 890         my @el = @{ $el };
 891         splice @el, 0, 2;
 892         unshift @pod, @el if @el;
 893       }
 894     }
 895
 896     return @l ? \@l : undef;
 897   }
 898
 899   sub _look_for_section {
 900     my ($self, $pod, $section) = @_;
 901
 902     my $level;
 903     $self->_look_for_range($pod,
 904       sub {
 905         my ($el) = @_;
 906         my ($heading) = $el->[0] =~ /^head(\d)$/;
 907         my $f = $heading && $el->[2] =~ /^$section/;
 908         $level = $heading if $f && !$level;
 909         return $f;
 910       },
 911       sub {
 912         my ($el) = @_;
 913         $el->[0] =~ /^head(\d)$/ && $1 <= $level;
 914       },
 915     );
 916   }
 917
 918   sub _look_for_range {
 919     my ($self, $pod, $start_predicate, $stop_predicate) = @_;
 920
 921     my @l;
 922     for my $el (@{ $pod }) {
 923       if (@l) {
 924         return \@l if $stop_predicate->($el);
 925       }
 926       else {
 927         next unless $start_predicate->($el);
 928       }
 929       push @l, $el;
 930     }
 931
 932     return;
 933   }
 934 }
 935
 936 run;