This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Porting/corelist-perldelta.pl - Use Unix newlines in perldelta
[perl5.git] / Porting / corelist-perldelta.pl
CommitLineData
603f4ea0
DG
1#!perl
2use 5.010;
3use strict;
4use warnings;
5use lib 'Porting';
6use Maintainers qw/%Modules/;
5e0e6da6 7use lib 'dist/Module-CoreList/lib';
603f4ea0 8use Module::CoreList;
2a19fd14 9use Getopt::Long;
e8097ff2
MM
10
11=head1 USAGE
12
13 # generate the module changes for the Perl you are currently building
67e92e89 14 ./perl -Ilib Porting/corelist-perldelta.pl
5c51ce30
AV
15
16 # update the module changes for the Perl you are currently building
17 perl Porting/corelist-perldelta.pl --mode=update Porting/perldelta.pod
18
e8097ff2 19 # generate a diff between the corelist sections of two perldelta* files:
80da4b39 20 perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod
e8097ff2
MM
21
22=head1 ABOUT
23
24corelist-perldelta.pl is a bit schizophrenic. The part to generate the
25new Perldelta text does not need Algorithm::Diff, but wants to be
26run with the freshly built Perl.
27
28The part to check the diff wants to be run with a Perl that has an up-to-date
29L<Module::CoreList>, but needs the outside L<Algorithm::Diff>.
30
31Ideally, the program will be split into two separate programs, one
b88937b3 32to generate the text and one to show the diff between the
e8097ff2
MM
33corelist sections of the last perldelta and the next perldelta.
34
35=cut
93b00636
FR
36
37my %sections = (
8d132c8e
MM
38 new => qr/New Modules and Pragma(ta)?/,
39 updated => qr/Updated Modules and Pragma(ta)?/,
40 removed => qr/Removed Modules and Pragma(ta)?/,
41);
42
43my %titles = (
caaa1415
MM
44 new => 'New Modules and Pragmata',
45 updated => 'Updated Modules and Pragmata',
46 removed => 'Removed Modules and Pragmata',
93b00636 47);
603f4ea0 48
88e14305
DG
49my $deprecated;
50
2a19fd14
FR
51sub run {
52 my %opt = (mode => 'generate');
53
54 GetOptions(\%opt,
5c51ce30 55 'mode|m:s', # 'generate', 'check', 'update'
2a19fd14
FR
56 );
57
93b00636
FR
58 # by default, compare latest two version in CoreList;
59 my @versions = sort keys %Module::CoreList::version;
5e1cf63b
AV
60 my $old = $versions[-2];
61 my $new = $versions[-1];
62
63 # use the provided versions if present
64 # @ARGV >=2 means [old_version] [new_version] [path/to/file]
65 if ( @ARGV >= 2) {
66 ($old, $new) = (shift @ARGV, shift @ARGV);
67 die "$old is an invalid version\n" if not exists
68 $Module::CoreList::version{$old};
69 die "$new is an invalid verison\n" if not exists
70 $Module::CoreList::version{$new};
71 }
2a19fd14 72
93b00636 73 if ( $opt{mode} eq 'generate' ) {
2a19fd14 74 do_generate($old => $new);
88e14305 75 }
93b00636
FR
76 elsif ( $opt{mode} eq 'check' ) {
77 do_check(\*ARGV, $old => $new);
78 }
5c51ce30
AV
79 elsif ( $opt{mode} eq 'update' ) {
80 do_update_existing(shift @ARGV, $old => $new);
81 }
88e14305 82 else {
2a19fd14
FR
83 die "Unrecognized mode '$opt{mode}'\n";
84 }
85
86 exit 0;
87}
88
f40724d5
AV
89# Given two perl versions, it returns a list describing the core distributions that have changed.
90# The first three elements are hashrefs corresponding to new, updated, and removed modules
91# and are of the form (mostly, see the special remarks about removed):
92# 'Distribution Name' => ['Distribution Name', previous version number, current version number]
93# where the version number is undef if the distribution did not exist the fourth element is
94# an arrayref of core distribution names of those distribution for which it is unknown whether
95# they have changed and therefore need to be manually checked.
96#
97# In most cases, the distribution name in %Modules corresponds to the module that is representative
98# of the distribution as listed in Module::CoreList. However, there are a few distribution names
99# that do not correspond to a module. %distToModules, has been created which maps the distribution
100# name to a representative module. The representative module was chosen by either looking at the
101# Makefile of the distribution or by seeing which module the distribution has been traditionally
102# listed under in past perldelta.
103#
104# There are a few distributions for which there is no single representative module (e.g. libnet).
105# These distributions are returned as the last element of the list.
106#
107# %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p.
108# This list contains modules and pragmata that may also be present in Module::CoreList.
109# A list of modules are in the list @unclaimedModules, which were manually listed based on whether
110# they were independent modules and whether they have been listed in past perldelta.
111# The pragmata were found by doing something like:
112# say for sort grep { $_ eq lc $_ and !exists $Modules{$_}}
113# keys %{$Module::CoreList::version{'5.019003'}}
114# and manually filtering out pragamata that were already covered.
115#
116# It is currently not possible to differentiate between a removed module and a removed
117# distribution. Therefore, the removed hashref contains every module that has been removed, even if
118# the module's corresponding distribution has not been removed.
119
93b00636 120sub corelist_delta {
2a19fd14
FR
121 my ($old, $new) = @_;
122 my $corelist = \%Module::CoreList::version;
f40724d5 123 my %changes = Module::CoreList::changes_between( $old, $new );
2a19fd14
FR
124 $deprecated = $Module::CoreList::deprecated{$new};
125
f40724d5
AV
126 my $getModifyType = sub {
127 my $data = shift;
128 if ( exists $data->{left} and exists $data->{right} ) {
129 return 'updated';
130 }
131 elsif ( !exists $data->{left} and exists $data->{right} ) {
132 return 'new';
133 }
134 elsif ( exists $data->{left} and !exists $data->{right} ) {
135 return 'removed';
136 }
137 return undef;
138 };
139
140 my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap Win32CORE/;
141 my @unclaimedPragmata = qw/_charnames arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/;
142 my @unclaimed = (@unclaimedModules, @unclaimedPragmata);
143
144 my %distToModules = (
145 'IO-Compress' => [
146 {
147 'name' => 'IO-Compress',
148 'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ),
149 'data' => $changes{'IO::Compress::Base'}
150 }
151 ],
152 'Locale-Codes' => [
153 {
154 'name' => 'Locale::Codes',
155 'modification' => $getModifyType->( $changes{'Locale::Codes'} ),
156 'data' => $changes{'Locale::Codes'}
157 }
158 ],
159 'PathTools' => [
160 {
161 'name' => 'File::Spec',
162 'modification' => $getModifyType->( $changes{'Cwd'} ),
163 'data' => $changes{'Cwd'}
164 }
165 ],
166 'Scalar-List-Utils' => [
167 {
168 'name' => 'List::Util',
169 'modification' => $getModifyType->( $changes{'List::Util'} ),
170 'data' => $changes{'List::Util'}
171 },
172 {
173 'name' => 'Scalar::Util',
174 'modification' => $getModifyType->( $changes{'Scalar::Util'} ),
175 'data' => $changes{'Scalar::Util'}
176 }
177 ],
178 'Text-Tabs+Wrap' => [
179 {
180 'name' => 'Text::Tabs',
181 'modification' => $getModifyType->( $changes{'Text::Tabs'} ),
182 'data' => $changes{'Text::Tabs'}
183 },
184 {
185 'name' => 'Text::Wrap',
186 'modification' => $getModifyType->( $changes{'Text::Wrap'} ),
187 'data' => $changes{'Text::Wrap'}
188 }
189 ],
190 );
191
192 # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ]
193 my $deltaGrouping = {};
194
195 # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it
196 my @manuallyCheck;
2a19fd14
FR
197
198 # %Modules defines what is currently in core
199 for my $k ( keys %Modules ) {
f40724d5
AV
200 next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed
201 next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed
202
203 my ( $distName, $modifyType, $data );
204
205 if ( exists $changes{$k} ) {
206 $distName = $k;
207 $modifyType = $getModifyType->( $changes{$k} );
208 $data = $changes{$k};
88e14305 209 }
f40724d5
AV
210 elsif ( exists $distToModules{$k} ) {
211 # modification will be undef if the distribution has not changed
212 my @modules = grep { $_->{modification} } @{ $distToModules{$k} };
213 for (@modules) {
214 $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ];
2a19fd14 215 }
f40724d5
AV
216 next;
217 }
218 else {
219 push @manuallyCheck, $k and next;
220 }
221
222 $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ];
223 }
224
225 for my $k (@unclaimed) {
226 if ( exists $changes{$k} ) {
227 $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} =
228 [ $k, $changes{$k}->{left}, $changes{$k}->{right} ];
88e14305
DG
229 }
230 }
603f4ea0 231
2a19fd14
FR
232 # in old corelist, but not this one => removed
233 # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from
234 # distributions will show up here, too. Some person will have to review to see what's
235 # important. That's the best we can do without a historical Maintainers.pl
236 for my $k ( keys %{ $corelist->{$old} } ) {
237 if ( ! exists $corelist->{$new}{$k} ) {
f40724d5 238 $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ];
2a19fd14 239 }
88e14305 240 }
88e14305 241
f40724d5
AV
242 return (
243 \%{ $deltaGrouping->{'new'} },
244 \%{ $deltaGrouping->{'removed'} },
245 \%{ $deltaGrouping->{'updated'} },
246 \@manuallyCheck
247 );
93b00636
FR
248}
249
5c51ce30
AV
250# currently does not update the Removed Module section
251sub do_update_existing {
252 my ( $existing, $old, $new ) = @_;
253
254 my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new );
255 if ($manuallyCheck) {
256 say "Please check whether the following distributions have been modified and list accordingly";
257 say "\t* $_" for sort @{$manuallyCheck};
258 }
259
260 my $data = {
261 new => $added,
262 updated => $updated,
263 #removed => $removed, ignore removed for now
264 };
265
266 my $text = DeltaUpdater::transform_pod( $existing, $data );
267 open my $out, '>', $existing or die "can't open perldelta file $existing: $!";
9c7cb461 268 binmode($out);
5c51ce30
AV
269 print $out $text;
270 close $out;
271}
272
93b00636
FR
273sub do_generate {
274 my ($old, $new) = @_;
f40724d5
AV
275 my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new);
276
277 if ($manuallyCheck) {
278 say "\nXXXPlease check whether the following distributions have been modified and list accordingly";
279 say "\t$_" for @{$manuallyCheck};
280 }
93b00636 281
3bb80250
AV
282 my $data = {
283 new => $added,
284 updated => $updated,
285 #removed => $removed, ignore removed for now
286 };
287
288 say DeltaUpdater::sections_to_pod($data)
93b00636
FR
289}
290
291sub do_check {
292 my ($in, $old, $new) = @_;
293
294 my $delta = DeltaParser->new($in);
f40724d5 295 my ($added, $removed, $updated) = corelist_delta($old => $new);
93b00636 296
fc353283
AV
297 # because of the difficulty in identifying the distribution for removed modules
298 # don't bother checking them
f40724d5 299 for my $ck ([ 'new', $delta->new_modules, $added ],
fc353283 300 #[ 'removed', $delta->removed_modules, $removed ],
f40724d5 301 [ 'updated', $delta->updated_modules, $updated ] ) {
93b00636 302 my @delta = @{ $ck->[1] };
f40724d5 303 my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] };
93b00636
FR
304
305 printf $ck->[0] . ":\n";
306
e8097ff2 307 require Algorithm::Diff;
93b00636
FR
308 my $diff = Algorithm::Diff->new(map {
309 [map { join q{ } => grep defined, @{ $_ } } @{ $_ }]
310 } \@delta, \@corelist);
311
312 while ($diff->Next) {
313 next if $diff->Same;
314 my $sep = '';
315 if (!$diff->Items(2)) {
316 printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 ));
317 } elsif(!$diff->Items(1)) {
318 printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 ));
319 } else {
320 $sep = "---\n";
321 printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 ));
322 }
fc353283 323 print "Delta< $_\n" for $diff->Items(1);
93b00636 324 print $sep;
fc353283 325 print "Corelist> $_\n" for $diff->Items(2);
93b00636
FR
326 }
327
328 print "\n";
329 }
330}
331
332{
5c51ce30
AV
333
334 package DeltaUpdater;
335 use List::Util 'reduce';
336
337 sub get_section_name_from_heading {
338 my $heading = shift;
339 while (my ($key, $expression) = each %sections) {
340 if ($heading =~ $expression) {
341 return $titles{$key};
342 }
343 }
344 die "$heading did not match any section";
345 }
346
347 sub is_desired_section_name {
348 for (values %sections) {
349 return 1 if $_[0] =~ $_;
350 }
351 return 0;
352 }
353
354 # verify the module and pragmata in the section, changing the stated version if necessary
355 # this subroutine warns if the module name cannot be parsed or if it is not listed in
356 # the results returned from corelist_delta()
357 #
358 # a side-effect of calling this function is that modules present in the section are
359 # removed from $data, resulting in $data containing only those modules and pragmata
360 # that were not listed in the perldelta file. This means we can then pass $data to
361 # add_to_section() without worrying about filtering out duplicates
362 sub update_section {
363 my ( $section, $data, $title ) = @_;
364 my @items = @{ $section->{items} };
365
366 for my $item (@items) {
367
368 my $content = $item->{text};
369 my $module = $item->{name};
370
371 say "Could not parse module name; line is:\n\t$content" and next unless $module;
372 say "$module is not in Module::CoreList; check to see that it is not covered by another section" and next
373 unless $data->{$title}{$module};
374
375 if ( $title eq 'new' ) {
376 my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m;
377 say "Could not parse new version for $module; line is:\n\t$content" and next unless $new;
378 if ( $data->{$title}{$module}[2] ne $new ) {
379 say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
380 }
381 $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me;
382 }
383
384 elsif ( $title eq 'updated' ) {
385 my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
386 say "Could not parse old and new version for $module; line is:\n\t$content" and next
387 unless $prev and $new;
388 if ( $data->{$title}{$module}[1] ne $prev ) {
389 say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1];
390 }
391 if ( $data->{$title}{$module}[2] ne $new ) {
392 say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
393 }
394 $content =~
395 s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se;
396 }
397
398 elsif ( $title eq 'removed' ) {
399 my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m;
400 say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev;
401 if ( $data->{$title}{$module}[1] ne $prev ) {
402 say "$module: previous version differs; $prev " . $data->{$title}{$module}[1];
403 }
404 $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me;
405 }
406
407 delete $data->{$title}{$module};
408 $item->{text} = $content;
409 }
410 return $section;
411 }
412
413 # add modules and pragmata present in $data to the section
414 sub add_to_section {
415 my ( $section, $data, $title ) = @_;
416
417 #undef is a valid version name in Module::CoreList so supress warnings about concatenating undef values
418 no warnings 'uninitialized';
419 for ( values %{ $data->{$title} } ) {
420 my ( $mod, $old_v, $new_v ) = @{$_};
421 my ( $item, $text );
422
423 $item = { name => $mod, text => "=item *\n" };
424 if ( $title eq 'new' ) {
425 $text = "L<$mod> $new_v has been added to the Perl core.\n";
426 }
427
428 elsif ( $title eq 'updated' ) {
429 $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n";
430 if ( $deprecated->{$mod} ) {
431 $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n";
432 }
433 }
434
435 elsif ( $title eq 'removed' ) {
436 $text = "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n";
437 }
438
439 $item->{text} .= "\n$text\n";
440 push @{ $section->{items} }, $item;
441 }
442 return $section;
443 }
444
445 sub sort_items_in_section {
446 my ($section) = @_;
447
448 # if we could not parse the module name, it will be uninitalized
449 # in sort. This is not a problem as it will just result in these
450 # sections being placed near the begining of the section
451 no warnings 'uninitialized';
452 $section->{items} =
453 [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ];
454 return $section;
455 }
456
457 # given a hashref of the form returned by corelist_delta()
458 # and a hash structured as documented in transform_pod(), it returns
459 # a pod string representation of the sections, creating sections
460 # if necessary
461 sub sections_to_pod {
462 my ( $data, %sections ) = @_;
463 my $out = '';
464
465 for (
466 (
467 [ 'New Modules and Pragmata', 'new' ],
468 [ 'Updated Modules and Pragmata', 'updated' ],
469 [ 'Removed Modules and Pragmata', 'removed' ]
470 )
471 )
472 {
473 my ( $section_name, $title ) = @{$_};
474
475 my $section = $sections{$section_name} // {
476 name => $section_name,
477 preceeding_text => "=head2 $_->[0]\n=over 4\n",
478 following_text => "=back\n",
479 items => [],
480 manual => 1
481 };
482
483 $section = update_section( $section, $data, $title );
484 $section = add_to_section( $section, $data, $title );
485 $section = sort_items_in_section( $section );
486
487 next if $section->{manual} and scalar @{ $section->{items} } == 0;
488
489 my $items = reduce { no warnings 'once'; $a . $b->{text} }
490 ( '', @{ $section->{items} } );
491 $out .=
492 ( $section->{preceeding_text} // '' )
493 . $items
494 . ( $section->{following_text} // '' );
495 }
496 return $out;
497 }
498
499 # given a filename corresponding to an existing perldelta file
500 # and a hashref of the form returned by corelist_delta(), it
501 # returns a string of the resulting file after the module
502 # information has been added.
503 sub transform_pod {
504 my ( $existing, $data ) = @_;
505
506 # will contain hashrefs corresponding to new, updated and removed
507 # modules and pragmata keyed by section name
508 # each section is hashref of the structure
509 # preceeding_text => Text occuring before and including the over
510 # region containing the list of modules,
511 # items => [Arrayref of hashrefs corresponding to a module
512 # entry],
513 # an entry has the form:
514 # name => Module name or undef if the name could not be determined
515 # text => The text of the entry, including the item heading
516 #
517 # following_text => Any text not corresponding to a module
518 # that occurs after the first module
519 #
520 # the sections are converted to a pod string by calling sections_to_pod()
521 my %sections;
522
523 # we are in the Modules_and_Pragmata's section
524 my $in_Modules_and_Pragmata;
525 # we are the Modules_and_Pragmata's section but have not
526 # encountered any of the desired sections. We use this
527 # flag to determine whether we should append the text to $out
528 # or we need to delay appending until the module listings are
529 # processed and instead append to $append_to_out
530 my $in_Modules_and_Pragmata_preamble;
531 my $done_processing_Modules_and_Pragmata;
532
533 my $current_section;
534 # $nested_element_level == 0 : not in an over region, treat lines as text
535 # $nested_element_level == 1 : presumably in the top over region that
536 # corresponds to the module listing. Treat
537 # each item as a module
538 # $nested_element_level > 1 : we only consider these values when we are in an item
539 # We treat lines as the text of the current item.
540 my $nested_element_level = 0;
541 my $current_item;
542 my $need_to_parse_module_name;
543
544 my $out = '';
545 my $append_to_out = '';
546
547 open my $fh, '<', $existing or die "can't open perldelta file $existing: $!";
9c7cb461 548 binmode($fh);
5c51ce30
AV
549
550 while (<$fh>) {
551 # treat the rest of the file as plain text
552 if ($done_processing_Modules_and_Pragmata) {
553 $out .= $_;
554 next;
555 }
556
557 elsif ( !$in_Modules_and_Pragmata ) {
558 # entering Modules and Pragmata
559 if (/^=head1 Modules and Pragmata/) {
560 $in_Modules_and_Pragmata = 1;
561 $in_Modules_and_Pragmata_preamble = 1;
562 }
563 $out .= $_;
564 next;
565 }
566
567 # leaving Modules and Pragmata
568 elsif (/^=head1/) {
569 if ($current_section) {
570 push @{ $current_section->{items} }, $current_item
571 if $current_item;
572 $sections{ $current_section->{name} } = $current_section;
573 }
574 $done_processing_Modules_and_Pragmata = 1;
575 $out .=
576 sections_to_pod( $data, %sections ) . $append_to_out . $_;
577 next;
578 }
579
580 # new section in Modules and Pragmata
581 elsif (/^=head2 (.*?)$/) {
582 my $name = $1;
583 if ($current_section) {
584 push @{ $current_section->{items} }, $current_item
585 if $current_item;
586 $sections{ $current_section->{name} } = $current_section;
587 undef $current_section;
588 }
589
590 if ( is_desired_section_name($name) ) {
591 undef $in_Modules_and_Pragmata_preamble;
592 if ( $nested_element_level > 0 ) {
593 die "Unexpected head2 at line no. $.";
594 }
595 my $title = get_section_name_from_heading($name);
596 if ( exists $sections{$title} ) {
597 die "$name occured twice at line no. $.";
598 }
599 $current_section = {};
600 $current_section->{name} = $title;
601 $current_section->{preceeding_text} = $_;
602 $current_section->{items} = [];
603 $nested_element_level = 0;
604 next;
605 }
606
607 # otherwise treat section as plain text
608 else {
609 if ($in_Modules_and_Pragmata_preamble) {
610 $out .= $_;
611 }
612 else {
613 $append_to_out .= $_;
614 }
615 next;
616 }
617 }
618
619 elsif ($current_section) {
620
621 # not in an over region
622 if ( $nested_element_level == 0 ) {
623 if (/^=over/) {
624 $nested_element_level++;
625 }
626 if ( scalar @{ $current_section->{items} } > 0 ) {
627 $current_section->{following_text} .= $_;
628 }
629 else {
630 $current_section->{preceeding_text} .= $_;
631 }
632 next;
633 }
634
635 if ($current_item) {
636 if ($need_to_parse_module_name) {
637 # the item may not have a parsable module name, which means that
638 # $current_item->{name} will never be defined.
639 if (/^(?:L|C)<(.+?)>/) {
640 $current_item->{name} = $1;
641 undef $need_to_parse_module_name;
642 }
643 # =item or =back signals the end of an item
644 # block, which we handle below
645 if ( !/^=(?:item|back)/ ) {
646 $current_item->{text} .= $_;
647 next;
648 }
649 }
650 # currently in an over region
651 # treat text inside region as plain text
652 if ( $nested_element_level > 1 ) {
653 if (/^=back/) {
654 $nested_element_level--;
655 }
656 elsif (/^=over/) {
657 $nested_element_level++;
658 }
659 $current_item->{text} .= $_;
660 next;
661 }
662 # entering over region
663 if (/^=over/) {
664 $nested_element_level++;
665 $current_item->{text} .= $_;
666 next;
667 }
668 # =item or =back signals the end of an item
669 # block, which we handle below
670 if ( !/^=(?:item|back)/ ) {
671 $current_item->{text} .= $_;
672 next;
673 }
674 }
675
676 if (/^=item \*/) {
677 push @{ $current_section->{items} }, $current_item
678 if $current_item;
679 $current_item = { text => $_ };
680 $need_to_parse_module_name = 1;
681 next;
682 }
683
684 if (/^=back/) {
685 push @{ $current_section->{items} }, $current_item
686 if $current_item;
687 undef $current_item;
688 $nested_element_level--;
689 }
690
691 if ( scalar @{ $current_section->{items} } == 0 ) {
692 $current_section->{preceeding_text} .= $_;
693 }
694 else {
695 $current_section->{following_text} .= $_;
696 }
697 next;
698 }
699
700 # text in Modules and Pragmata not in a head2 region
701 else {
702 if ($in_Modules_and_Pragmata_preamble) {
703 $out .= $_;
704 }
705 else {
706 $append_to_out .= $_;
707 }
708 next;
709 }
710 }
711 close $fh;
712 die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata;
713 return $out;
714 }
715
716}
717
718{
93b00636
FR
719 package DeltaParser;
720 use Pod::Simple::SimpleTree;
721
722 sub new {
723 my ($class, $input) = @_;
724
725 my $self = bless {} => $class;
726
727 my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root;
728 splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure,
8d132c8e 729 # just the nodes within it
93b00636
FR
730
731 $self->_parse_delta($parsed_pod);
732
733 return $self;
734 }
735
c014dfad
AV
736 # creates the accessor methods:
737 # new_modules
738 # updated_modules
739 # removed_modules
93b00636
FR
740 for my $k (keys %sections) {
741 no strict 'refs';
742 my $m = "${k}_modules";
743 *$m = sub { $_[0]->{$m} };
744 }
745
746 sub _parse_delta {
747 my ($self, $pod) = @_;
748
c014dfad
AV
749 my $new_section = $self->_look_for_section( $pod, $sections{new} );
750 my $updated_section = $self->_look_for_section( $pod, $sections{updated} );
751 my $removed_section = $self->_look_for_section( $pod, $sections{removed} );
752
753 $self->_parse_new_section($new_section);
754 $self->_parse_updated_section($updated_section);
755 $self->_parse_removed_section($removed_section);
756
757 for (qw/new_modules updated_modules removed_modules/) {
758 $self->{$_} =
759 [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ];
93b00636
FR
760 }
761
762 return;
763 }
764
765 sub _parse_new_section {
766 my ($self, $section) = @_;
767
c014dfad
AV
768 $self->{new_modules} = [];
769 return unless $section;
93b00636
FR
770 $self->{new_modules} = $self->_parse_section($section => sub {
771 my ($el) = @_;
772
773 my ($first, $second) = @{ $el }[2, 3];
774 my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/;
775
776 return [ $first->[2], undef, $ver ];
777 });
778
779 return;
780 }
781
782 sub _parse_updated_section {
783 my ($self, $section) = @_;
784
3668a9e5
AV
785 $self->{updated_modules} = [];
786 return unless $section;
93b00636
FR
787 $self->{updated_modules} = $self->_parse_section($section => sub {
788 my ($el) = @_;
789
790 my ($first, $second) = @{ $el }[2, 3];
791 my $module = $first->[2];
3668a9e5
AV
792
793 # the regular expression matches the following:
794 # from VERSION_NUMBER to VERSION_NUMBER
795 # from VERSION_NUMBER to VERSION_NUMBER.
796 # from version VERSION_NUMBER to version VERSION_NUMBER.
797 # from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER
798 # from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER
799 #
800 # some perldelta contain more than one module listed in an entry, this only attempts to match the
801 # first module
93b00636 802 my ($old, $new) = $second =~
3668a9e5 803 /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
93b00636
FR
804
805 warn "Unable to extract old or new version of $module from perldelta"
806 if !defined $old || !defined $new;
807
808 return [ $module, $old, $new ];
809 });
810
811 return;
812 }
813
814 sub _parse_removed_section {
815 my ($self, $section) = @_;
c014dfad
AV
816
817 $self->{removed_modules} = [];
818 return unless $section;
93b00636
FR
819 $self->{removed_modules} = $self->_parse_section($section => sub {
820 my ($el) = @_;
821
822 my ($first, $second) = @{ $el }[2, 3];
823 my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/;
824
825 return [ $first->[2], $old, undef ];
826 });
827
828 return;
829 }
830
831 sub _parse_section {
832 my ($self, $section, $parser) = @_;
833
834 my $items = $self->_look_down($section => sub {
835 my ($el) = @_;
836 return unless ref $el && $el->[0] =~ /^item-/
837 && @{ $el } > 2 && ref $el->[2];
5c404e2c 838 return unless $el->[2]->[0] =~ /C|L/;
93b00636
FR
839
840 return 1;
841 });
842
843 return [map { $parser->($_) } @{ $items }];
844 }
845
846 sub _look_down {
847 my ($self, $pod, $predicate) = @_;
848 my @pod = @{ $pod };
849
850 my @l;
851 while (my $el = shift @pod) {
852 push @l, $el if $predicate->($el);
853 if (ref $el) {
854 my @el = @{ $el };
855 splice @el, 0, 2;
856 unshift @pod, @el if @el;
857 }
858 }
859
860 return @l ? \@l : undef;
861 }
862
863 sub _look_for_section {
864 my ($self, $pod, $section) = @_;
865
866 my $level;
867 $self->_look_for_range($pod,
868 sub {
869 my ($el) = @_;
86c08a2c 870 my ($heading) = $el->[0] =~ /^head(\d)$/;
b88937b3 871 my $f = $heading && $el->[2] =~ /^$section/;
86c08a2c 872 $level = $heading if $f && !$level;
93b00636
FR
873 return $f;
874 },
875 sub {
876 my ($el) = @_;
877 $el->[0] =~ /^head(\d)$/ && $1 <= $level;
878 },
879 );
880 }
881
882 sub _look_for_range {
883 my ($self, $pod, $start_predicate, $stop_predicate) = @_;
884
885 my @l;
886 for my $el (@{ $pod }) {
887 if (@l) {
888 return \@l if $stop_predicate->($el);
889 }
890 else {
891 next unless $start_predicate->($el);
892 }
893 push @l, $el;
894 }
895
896 return;
897 }
2a19fd14 898}
88e14305 899
2a19fd14 900run;