This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Faster feature checks
[perl5.git] / Porting / corelist-perldelta.pl
CommitLineData
603f4ea0
DG
1#!perl
2use 5.010;
3use strict;
4use warnings;
5use lib 'Porting';
6use Maintainers qw/%Modules/;
5e0e6da6 7use lib 'dist/Module-CoreList/lib';
603f4ea0 8use Module::CoreList;
2a19fd14 9use Getopt::Long;
e8097ff2
MM
10
11=head1 USAGE
12
13 # generate the module changes for the Perl you are currently building
67e92e89 14 ./perl -Ilib Porting/corelist-perldelta.pl
5c51ce30
AV
15
16 # update the module changes for the Perl you are currently building
59d2de25 17 ./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod
5c51ce30 18
e8097ff2 19 # generate a diff between the corelist sections of two perldelta* files:
80da4b39 20 perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod
e8097ff2
MM
21
22=head1 ABOUT
23
24corelist-perldelta.pl is a bit schizophrenic. The part to generate the
25new Perldelta text does not need Algorithm::Diff, but wants to be
26run with the freshly built Perl.
27
28The part to check the diff wants to be run with a Perl that has an up-to-date
29L<Module::CoreList>, but needs the outside L<Algorithm::Diff>.
30
31Ideally, the program will be split into two separate programs, one
b88937b3 32to generate the text and one to show the diff between the
e8097ff2
MM
33corelist sections of the last perldelta and the next perldelta.
34
59d2de25
AV
35Currently no information about Removed Modules is displayed in any of the
36modes.
54e78c5e 37
e8097ff2 38=cut
93b00636
FR
39
40my %sections = (
8d132c8e
MM
41 new => qr/New Modules and Pragma(ta)?/,
42 updated => qr/Updated Modules and Pragma(ta)?/,
43 removed => qr/Removed Modules and Pragma(ta)?/,
44);
45
46my %titles = (
caaa1415
MM
47 new => 'New Modules and Pragmata',
48 updated => 'Updated Modules and Pragmata',
49 removed => 'Removed Modules and Pragmata',
93b00636 50);
603f4ea0 51
88e14305
DG
52my $deprecated;
53
2a19fd14
FR
54sub run {
55 my %opt = (mode => 'generate');
56
57 GetOptions(\%opt,
5c51ce30 58 'mode|m:s', # 'generate', 'check', 'update'
2a19fd14
FR
59 );
60
93b00636 61 # by default, compare latest two version in CoreList;
c27d84d9 62 my ($old, $new) = latest_two_perl_versions();
5e1cf63b
AV
63
64 # use the provided versions if present
65 # @ARGV >=2 means [old_version] [new_version] [path/to/file]
66 if ( @ARGV >= 2) {
67 ($old, $new) = (shift @ARGV, shift @ARGV);
68 die "$old is an invalid version\n" if not exists
69 $Module::CoreList::version{$old};
29900347 70 die "$new is an invalid version\n" if not exists
5e1cf63b
AV
71 $Module::CoreList::version{$new};
72 }
2a19fd14 73
93b00636 74 if ( $opt{mode} eq 'generate' ) {
2a19fd14 75 do_generate($old => $new);
88e14305 76 }
93b00636
FR
77 elsif ( $opt{mode} eq 'check' ) {
78 do_check(\*ARGV, $old => $new);
79 }
5c51ce30
AV
80 elsif ( $opt{mode} eq 'update' ) {
81 do_update_existing(shift @ARGV, $old => $new);
82 }
88e14305 83 else {
2a19fd14
FR
84 die "Unrecognized mode '$opt{mode}'\n";
85 }
86
87 exit 0;
88}
89
c27d84d9
AC
90sub latest_two_perl_versions {
91
92 my @versions = sort keys %Module::CoreList::version;
93
94 my $new = pop @versions;
95
96 # If a fully-padded version number ends in a zero (as in "5.019010"), that
97 # version shows up in %Module::CoreList::version both with and without its
98 # trailing zeros. So skip all versions that are numerically equal to $new.
99 pop @versions while @versions && $versions[-1] == $new;
100
101 die "Too few distinct core versions in %Module::CoreList::version ?!\n"
102 if !@versions;
103
104 return $versions[-1], $new;
105}
106
f40724d5
AV
107# Given two perl versions, it returns a list describing the core distributions that have changed.
108# The first three elements are hashrefs corresponding to new, updated, and removed modules
109# and are of the form (mostly, see the special remarks about removed):
110# 'Distribution Name' => ['Distribution Name', previous version number, current version number]
3c7fa354
SH
111# where the version number is undef if the distribution did not exist.
112# The fourth element is an arrayref of core distribution names of those distribution for which it
113# is unknown whether they have changed and therefore need to be manually checked.
f40724d5
AV
114#
115# In most cases, the distribution name in %Modules corresponds to the module that is representative
116# of the distribution as listed in Module::CoreList. However, there are a few distribution names
3c7fa354 117# that do not correspond to a module. %distToModules has been created which maps the distribution
f40724d5
AV
118# name to a representative module. The representative module was chosen by either looking at the
119# Makefile of the distribution or by seeing which module the distribution has been traditionally
3c7fa354 120# listed under in past perldeltas.
f40724d5
AV
121#
122# There are a few distributions for which there is no single representative module (e.g. libnet).
123# These distributions are returned as the last element of the list.
124#
125# %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p.
126# This list contains modules and pragmata that may also be present in Module::CoreList.
127# A list of modules are in the list @unclaimedModules, which were manually listed based on whether
3c7fa354 128# they were independent modules and whether they have been listed in past perldeltas.
f40724d5
AV
129# The pragmata were found by doing something like:
130# say for sort grep { $_ eq lc $_ and !exists $Modules{$_}}
131# keys %{$Module::CoreList::version{'5.019003'}}
132# and manually filtering out pragamata that were already covered.
133#
134# It is currently not possible to differentiate between a removed module and a removed
135# distribution. Therefore, the removed hashref contains every module that has been removed, even if
136# the module's corresponding distribution has not been removed.
137
93b00636 138sub corelist_delta {
2a19fd14
FR
139 my ($old, $new) = @_;
140 my $corelist = \%Module::CoreList::version;
f40724d5 141 my %changes = Module::CoreList::changes_between( $old, $new );
2a19fd14
FR
142 $deprecated = $Module::CoreList::deprecated{$new};
143
f40724d5
AV
144 my $getModifyType = sub {
145 my $data = shift;
146 if ( exists $data->{left} and exists $data->{right} ) {
147 return 'updated';
148 }
149 elsif ( !exists $data->{left} and exists $data->{right} ) {
150 return 'new';
151 }
152 elsif ( exists $data->{left} and !exists $data->{right} ) {
153 return 'removed';
154 }
155 return undef;
156 };
157
236bdd01
N
158 my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB
159 DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl
160 ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob
161 File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash
162 I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via
163 Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash
164 Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm
165 Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap XS::APItest Win32CORE/;
92ebde38 166 my @unclaimedPragmata = qw/arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/;
f40724d5
AV
167 my @unclaimed = (@unclaimedModules, @unclaimedPragmata);
168
169 my %distToModules = (
170 'IO-Compress' => [
171 {
d8c6b69e 172 'name' => 'IO-Compress',
f40724d5 173 'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ),
d8c6b69e
SH
174 'data' => $changes{'IO::Compress::Base'}
175 }
176 ],
177 'libnet' => [
178 {
179 'name' => 'libnet',
180 'modification' => $getModifyType->( $changes{'Net::Cmd'} ),
181 'data' => $changes{'Net::Cmd'}
f40724d5
AV
182 }
183 ],
f40724d5
AV
184 'PathTools' => [
185 {
186 'name' => 'File::Spec',
187 'modification' => $getModifyType->( $changes{'Cwd'} ),
188 'data' => $changes{'Cwd'}
189 }
190 ],
d8c6b69e
SH
191 'podlators' => [
192 {
193 'name' => 'podlators',
194 'modification' => $getModifyType->( $changes{'Pod::Text'} ),
195 'data' => $changes{'Pod::Text'}
196 }
197 ],
f40724d5
AV
198 'Scalar-List-Utils' => [
199 {
200 'name' => 'List::Util',
201 'modification' => $getModifyType->( $changes{'List::Util'} ),
202 'data' => $changes{'List::Util'}
203 },
204 {
205 'name' => 'Scalar::Util',
206 'modification' => $getModifyType->( $changes{'Scalar::Util'} ),
207 'data' => $changes{'Scalar::Util'}
d8c6b69e
SH
208 },
209 {
210 'name' => 'Sub::Util',
211 'modification' => $getModifyType->( $changes{'Sub::Util'} ),
212 'data' => $changes{'Sub::Util'}
f40724d5
AV
213 }
214 ],
215 'Text-Tabs+Wrap' => [
216 {
217 'name' => 'Text::Tabs',
218 'modification' => $getModifyType->( $changes{'Text::Tabs'} ),
219 'data' => $changes{'Text::Tabs'}
220 },
221 {
222 'name' => 'Text::Wrap',
223 'modification' => $getModifyType->( $changes{'Text::Wrap'} ),
224 'data' => $changes{'Text::Wrap'}
225 }
226 ],
227 );
228
229 # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ]
230 my $deltaGrouping = {};
231
232 # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it
233 my @manuallyCheck;
2a19fd14
FR
234
235 # %Modules defines what is currently in core
236 for my $k ( keys %Modules ) {
f40724d5
AV
237 next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed
238 next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed
239
240 my ( $distName, $modifyType, $data );
241
242 if ( exists $changes{$k} ) {
243 $distName = $k;
244 $modifyType = $getModifyType->( $changes{$k} );
245 $data = $changes{$k};
88e14305 246 }
f40724d5
AV
247 elsif ( exists $distToModules{$k} ) {
248 # modification will be undef if the distribution has not changed
249 my @modules = grep { $_->{modification} } @{ $distToModules{$k} };
250 for (@modules) {
251 $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ];
2a19fd14 252 }
f40724d5
AV
253 next;
254 }
255 else {
256 push @manuallyCheck, $k and next;
257 }
258
259 $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ];
260 }
261
262 for my $k (@unclaimed) {
263 if ( exists $changes{$k} ) {
264 $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} =
265 [ $k, $changes{$k}->{left}, $changes{$k}->{right} ];
88e14305
DG
266 }
267 }
603f4ea0 268
2a19fd14
FR
269 # in old corelist, but not this one => removed
270 # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from
271 # distributions will show up here, too. Some person will have to review to see what's
272 # important. That's the best we can do without a historical Maintainers.pl
273 for my $k ( keys %{ $corelist->{$old} } ) {
274 if ( ! exists $corelist->{$new}{$k} ) {
f40724d5 275 $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ];
2a19fd14 276 }
88e14305 277 }
88e14305 278
f40724d5
AV
279 return (
280 \%{ $deltaGrouping->{'new'} },
281 \%{ $deltaGrouping->{'removed'} },
282 \%{ $deltaGrouping->{'updated'} },
283 \@manuallyCheck
284 );
93b00636
FR
285}
286
5c51ce30
AV
287# currently does not update the Removed Module section
288sub do_update_existing {
289 my ( $existing, $old, $new ) = @_;
290
291 my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new );
c687a5cd 292 if (@{$manuallyCheck}) {
59d2de25
AV
293 print "It cannot be determined whether the following distributions have changed.\n";
294 print "Please check and list accordingly:\n";
5c51ce30 295 say "\t* $_" for sort @{$manuallyCheck};
59d2de25 296 print "\n";
5c51ce30
AV
297 }
298
299 my $data = {
300 new => $added,
301 updated => $updated,
302 #removed => $removed, ignore removed for now
303 };
304
305 my $text = DeltaUpdater::transform_pod( $existing, $data );
306 open my $out, '>', $existing or die "can't open perldelta file $existing: $!";
9c7cb461 307 binmode($out);
5c51ce30
AV
308 print $out $text;
309 close $out;
59d2de25
AV
310 say "The New and Updated Modules and Pragamata sections in $existing have been updated";
311 say "Please ensure the Removed Modules and Pragmata section is up-to-date";
5c51ce30
AV
312}
313
93b00636
FR
314sub do_generate {
315 my ($old, $new) = @_;
f40724d5
AV
316 my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new);
317
318 if ($manuallyCheck) {
59d2de25
AV
319 print "\nXXXIt cannot be determined whether the following distributions have changed.\n";
320 print "Please check and list accordingly:\n";
f40724d5 321 say "\t$_" for @{$manuallyCheck};
59d2de25 322 print "\n";
f40724d5 323 }
93b00636 324
3bb80250
AV
325 my $data = {
326 new => $added,
327 updated => $updated,
328 #removed => $removed, ignore removed for now
329 };
330
331 say DeltaUpdater::sections_to_pod($data)
93b00636
FR
332}
333
334sub do_check {
335 my ($in, $old, $new) = @_;
336
337 my $delta = DeltaParser->new($in);
f40724d5 338 my ($added, $removed, $updated) = corelist_delta($old => $new);
93b00636 339
fc353283
AV
340 # because of the difficulty in identifying the distribution for removed modules
341 # don't bother checking them
f40724d5 342 for my $ck ([ 'new', $delta->new_modules, $added ],
fc353283 343 #[ 'removed', $delta->removed_modules, $removed ],
f40724d5 344 [ 'updated', $delta->updated_modules, $updated ] ) {
93b00636 345 my @delta = @{ $ck->[1] };
f40724d5 346 my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] };
93b00636
FR
347
348 printf $ck->[0] . ":\n";
349
e8097ff2 350 require Algorithm::Diff;
93b00636
FR
351 my $diff = Algorithm::Diff->new(map {
352 [map { join q{ } => grep defined, @{ $_ } } @{ $_ }]
353 } \@delta, \@corelist);
354
355 while ($diff->Next) {
356 next if $diff->Same;
357 my $sep = '';
358 if (!$diff->Items(2)) {
359 printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 ));
360 } elsif(!$diff->Items(1)) {
361 printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 ));
362 } else {
363 $sep = "---\n";
364 printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 ));
365 }
fc353283 366 print "Delta< $_\n" for $diff->Items(1);
93b00636 367 print $sep;
fc353283 368 print "Corelist> $_\n" for $diff->Items(2);
93b00636
FR
369 }
370
371 print "\n";
372 }
373}
374
375{
5c51ce30
AV
376
377 package DeltaUpdater;
378 use List::Util 'reduce';
379
380 sub get_section_name_from_heading {
381 my $heading = shift;
382 while (my ($key, $expression) = each %sections) {
383 if ($heading =~ $expression) {
384 return $titles{$key};
385 }
386 }
387 die "$heading did not match any section";
388 }
389
390 sub is_desired_section_name {
391 for (values %sections) {
392 return 1 if $_[0] =~ $_;
393 }
394 return 0;
395 }
396
397 # verify the module and pragmata in the section, changing the stated version if necessary
398 # this subroutine warns if the module name cannot be parsed or if it is not listed in
399 # the results returned from corelist_delta()
400 #
401 # a side-effect of calling this function is that modules present in the section are
402 # removed from $data, resulting in $data containing only those modules and pragmata
403 # that were not listed in the perldelta file. This means we can then pass $data to
404 # add_to_section() without worrying about filtering out duplicates
405 sub update_section {
406 my ( $section, $data, $title ) = @_;
407 my @items = @{ $section->{items} };
408
409 for my $item (@items) {
410
411 my $content = $item->{text};
412 my $module = $item->{name};
413
318148ea
AV
414 #skip dummy items
415 next if !$module and $content =~ /\s*xx*\s*/i;
416
5c51ce30 417 say "Could not parse module name; line is:\n\t$content" and next unless $module;
59d2de25
AV
418
419 if ( !$data->{$title}{$module} ) {
420 print "$module is not listed as being $title in Module::CoreList.\n";
421 print "Ensure Module::CoreList has been updated and\n";
422 print "check to see that the distribution is not listed under another name.\n\n";
423 next;
424 }
5c51ce30
AV
425
426 if ( $title eq 'new' ) {
427 my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m;
428 say "Could not parse new version for $module; line is:\n\t$content" and next unless $new;
429 if ( $data->{$title}{$module}[2] ne $new ) {
430 say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
431 }
432 $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me;
433 }
434
435 elsif ( $title eq 'updated' ) {
436 my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
437 say "Could not parse old and new version for $module; line is:\n\t$content" and next
438 unless $prev and $new;
439 if ( $data->{$title}{$module}[1] ne $prev ) {
440 say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1];
441 }
442 if ( $data->{$title}{$module}[2] ne $new ) {
443 say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
444 }
445 $content =~
446 s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se;
447 }
448
449 elsif ( $title eq 'removed' ) {
450 my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m;
451 say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev;
452 if ( $data->{$title}{$module}[1] ne $prev ) {
453 say "$module: previous version differs; $prev " . $data->{$title}{$module}[1];
454 }
455 $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me;
456 }
457
458 delete $data->{$title}{$module};
459 $item->{text} = $content;
460 }
461 return $section;
462 }
463
464 # add modules and pragmata present in $data to the section
465 sub add_to_section {
466 my ( $section, $data, $title ) = @_;
467
8b591357 468 #undef is a valid version name in Module::CoreList so suppress warnings about concatenating undef values
5c51ce30
AV
469 no warnings 'uninitialized';
470 for ( values %{ $data->{$title} } ) {
471 my ( $mod, $old_v, $new_v ) = @{$_};
472 my ( $item, $text );
473
474 $item = { name => $mod, text => "=item *\n" };
475 if ( $title eq 'new' ) {
476 $text = "L<$mod> $new_v has been added to the Perl core.\n";
477 }
478
479 elsif ( $title eq 'updated' ) {
480 $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n";
481 if ( $deprecated->{$mod} ) {
482 $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n";
483 }
484 }
485
486 elsif ( $title eq 'removed' ) {
487 $text = "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n";
488 }
489
490 $item->{text} .= "\n$text\n";
491 push @{ $section->{items} }, $item;
492 }
493 return $section;
494 }
495
496 sub sort_items_in_section {
497 my ($section) = @_;
498
499 # if we could not parse the module name, it will be uninitalized
500 # in sort. This is not a problem as it will just result in these
8b591357 501 # sections being placed near the beginning of the section
5c51ce30
AV
502 no warnings 'uninitialized';
503 $section->{items} =
504 [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ];
505 return $section;
506 }
507
508 # given a hashref of the form returned by corelist_delta()
509 # and a hash structured as documented in transform_pod(), it returns
510 # a pod string representation of the sections, creating sections
511 # if necessary
512 sub sections_to_pod {
513 my ( $data, %sections ) = @_;
514 my $out = '';
515
516 for (
517 (
518 [ 'New Modules and Pragmata', 'new' ],
519 [ 'Updated Modules and Pragmata', 'updated' ],
520 [ 'Removed Modules and Pragmata', 'removed' ]
521 )
522 )
523 {
524 my ( $section_name, $title ) = @{$_};
525
526 my $section = $sections{$section_name} // {
3c7fa354
SH
527 name => $section_name,
528 preceding_text => "=head2 $_->[0]\n=over 4\n",
529 following_text => "=back\n",
530 items => [],
531 manual => 1
5c51ce30
AV
532 };
533
534 $section = update_section( $section, $data, $title );
535 $section = add_to_section( $section, $data, $title );
536 $section = sort_items_in_section( $section );
537
538 next if $section->{manual} and scalar @{ $section->{items} } == 0;
539
540 my $items = reduce { no warnings 'once'; $a . $b->{text} }
541 ( '', @{ $section->{items} } );
542 $out .=
3c7fa354 543 ( $section->{preceding_text} // '' )
5c51ce30
AV
544 . $items
545 . ( $section->{following_text} // '' );
546 }
547 return $out;
548 }
549
550 # given a filename corresponding to an existing perldelta file
551 # and a hashref of the form returned by corelist_delta(), it
552 # returns a string of the resulting file after the module
553 # information has been added.
554 sub transform_pod {
555 my ( $existing, $data ) = @_;
556
557 # will contain hashrefs corresponding to new, updated and removed
558 # modules and pragmata keyed by section name
559 # each section is hashref of the structure
3c7fa354
SH
560 # preceding_text => Text occurring before and including the over
561 # region containing the list of modules,
562 # items => [Arrayref of hashrefs corresponding to a module
563 # entry],
5c51ce30
AV
564 # an entry has the form:
565 # name => Module name or undef if the name could not be determined
566 # text => The text of the entry, including the item heading
567 #
3c7fa354
SH
568 # following_text => Any text not corresponding to a module
569 # that occurs after the first module
5c51ce30
AV
570 #
571 # the sections are converted to a pod string by calling sections_to_pod()
572 my %sections;
573
574 # we are in the Modules_and_Pragmata's section
575 my $in_Modules_and_Pragmata;
3c7fa354 576
5c51ce30
AV
577 # we are the Modules_and_Pragmata's section but have not
578 # encountered any of the desired sections. We use this
579 # flag to determine whether we should append the text to $out
580 # or we need to delay appending until the module listings are
581 # processed and instead append to $append_to_out
582 my $in_Modules_and_Pragmata_preamble;
3c7fa354 583
5c51ce30
AV
584 my $done_processing_Modules_and_Pragmata;
585
586 my $current_section;
3c7fa354 587
5c51ce30
AV
588 # $nested_element_level == 0 : not in an over region, treat lines as text
589 # $nested_element_level == 1 : presumably in the top over region that
590 # corresponds to the module listing. Treat
591 # each item as a module
592 # $nested_element_level > 1 : we only consider these values when we are in an item
593 # We treat lines as the text of the current item.
594 my $nested_element_level = 0;
3c7fa354 595
5c51ce30
AV
596 my $current_item;
597 my $need_to_parse_module_name;
598
599 my $out = '';
600 my $append_to_out = '';
601
602 open my $fh, '<', $existing or die "can't open perldelta file $existing: $!";
9c7cb461 603 binmode($fh);
5c51ce30
AV
604
605 while (<$fh>) {
606 # treat the rest of the file as plain text
607 if ($done_processing_Modules_and_Pragmata) {
608 $out .= $_;
609 next;
610 }
611
612 elsif ( !$in_Modules_and_Pragmata ) {
613 # entering Modules and Pragmata
614 if (/^=head1 Modules and Pragmata/) {
615 $in_Modules_and_Pragmata = 1;
616 $in_Modules_and_Pragmata_preamble = 1;
617 }
618 $out .= $_;
619 next;
620 }
621
622 # leaving Modules and Pragmata
623 elsif (/^=head1/) {
624 if ($current_section) {
625 push @{ $current_section->{items} }, $current_item
626 if $current_item;
627 $sections{ $current_section->{name} } = $current_section;
628 }
629 $done_processing_Modules_and_Pragmata = 1;
630 $out .=
631 sections_to_pod( $data, %sections ) . $append_to_out . $_;
632 next;
633 }
634
635 # new section in Modules and Pragmata
636 elsif (/^=head2 (.*?)$/) {
637 my $name = $1;
638 if ($current_section) {
639 push @{ $current_section->{items} }, $current_item
640 if $current_item;
641 $sections{ $current_section->{name} } = $current_section;
642 undef $current_section;
643 }
644
645 if ( is_desired_section_name($name) ) {
646 undef $in_Modules_and_Pragmata_preamble;
647 if ( $nested_element_level > 0 ) {
648 die "Unexpected head2 at line no. $.";
649 }
650 my $title = get_section_name_from_heading($name);
651 if ( exists $sections{$title} ) {
3c7fa354 652 die "$name occurred twice at line no. $.";
5c51ce30 653 }
3c7fa354
SH
654 $current_section = {};
655 $current_section->{name} = $title;
656 $current_section->{preceding_text} = $_;
657 $current_section->{items} = [];
658 $nested_element_level = 0;
5c51ce30
AV
659 next;
660 }
661
662 # otherwise treat section as plain text
663 else {
664 if ($in_Modules_and_Pragmata_preamble) {
665 $out .= $_;
666 }
667 else {
668 $append_to_out .= $_;
669 }
670 next;
671 }
672 }
673
674 elsif ($current_section) {
675
676 # not in an over region
677 if ( $nested_element_level == 0 ) {
678 if (/^=over/) {
679 $nested_element_level++;
680 }
681 if ( scalar @{ $current_section->{items} } > 0 ) {
682 $current_section->{following_text} .= $_;
683 }
684 else {
3c7fa354 685 $current_section->{preceding_text} .= $_;
5c51ce30
AV
686 }
687 next;
688 }
689
690 if ($current_item) {
691 if ($need_to_parse_module_name) {
692 # the item may not have a parsable module name, which means that
693 # $current_item->{name} will never be defined.
694 if (/^(?:L|C)<(.+?)>/) {
695 $current_item->{name} = $1;
696 undef $need_to_parse_module_name;
697 }
698 # =item or =back signals the end of an item
699 # block, which we handle below
700 if ( !/^=(?:item|back)/ ) {
701 $current_item->{text} .= $_;
702 next;
703 }
704 }
705 # currently in an over region
706 # treat text inside region as plain text
707 if ( $nested_element_level > 1 ) {
708 if (/^=back/) {
709 $nested_element_level--;
710 }
711 elsif (/^=over/) {
712 $nested_element_level++;
713 }
714 $current_item->{text} .= $_;
715 next;
716 }
717 # entering over region
718 if (/^=over/) {
719 $nested_element_level++;
720 $current_item->{text} .= $_;
721 next;
722 }
723 # =item or =back signals the end of an item
724 # block, which we handle below
725 if ( !/^=(?:item|back)/ ) {
726 $current_item->{text} .= $_;
727 next;
728 }
729 }
730
731 if (/^=item \*/) {
732 push @{ $current_section->{items} }, $current_item
733 if $current_item;
734 $current_item = { text => $_ };
735 $need_to_parse_module_name = 1;
736 next;
737 }
738
739 if (/^=back/) {
740 push @{ $current_section->{items} }, $current_item
741 if $current_item;
742 undef $current_item;
743 $nested_element_level--;
744 }
745
746 if ( scalar @{ $current_section->{items} } == 0 ) {
3c7fa354 747 $current_section->{preceding_text} .= $_;
5c51ce30
AV
748 }
749 else {
750 $current_section->{following_text} .= $_;
751 }
752 next;
753 }
754
755 # text in Modules and Pragmata not in a head2 region
756 else {
757 if ($in_Modules_and_Pragmata_preamble) {
758 $out .= $_;
759 }
760 else {
761 $append_to_out .= $_;
762 }
763 next;
764 }
765 }
766 close $fh;
767 die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata;
768 return $out;
769 }
770
771}
772
773{
93b00636
FR
774 package DeltaParser;
775 use Pod::Simple::SimpleTree;
776
777 sub new {
778 my ($class, $input) = @_;
779
780 my $self = bless {} => $class;
781
782 my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root;
783 splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure,
8d132c8e 784 # just the nodes within it
93b00636
FR
785
786 $self->_parse_delta($parsed_pod);
787
788 return $self;
789 }
790
c014dfad
AV
791 # creates the accessor methods:
792 # new_modules
793 # updated_modules
794 # removed_modules
93b00636
FR
795 for my $k (keys %sections) {
796 no strict 'refs';
797 my $m = "${k}_modules";
798 *$m = sub { $_[0]->{$m} };
799 }
800
801 sub _parse_delta {
802 my ($self, $pod) = @_;
803
c014dfad
AV
804 my $new_section = $self->_look_for_section( $pod, $sections{new} );
805 my $updated_section = $self->_look_for_section( $pod, $sections{updated} );
806 my $removed_section = $self->_look_for_section( $pod, $sections{removed} );
807
808 $self->_parse_new_section($new_section);
809 $self->_parse_updated_section($updated_section);
810 $self->_parse_removed_section($removed_section);
811
812 for (qw/new_modules updated_modules removed_modules/) {
813 $self->{$_} =
814 [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ];
93b00636
FR
815 }
816
817 return;
818 }
819
820 sub _parse_new_section {
821 my ($self, $section) = @_;
822
c014dfad
AV
823 $self->{new_modules} = [];
824 return unless $section;
93b00636
FR
825 $self->{new_modules} = $self->_parse_section($section => sub {
826 my ($el) = @_;
827
828 my ($first, $second) = @{ $el }[2, 3];
829 my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/;
830
831 return [ $first->[2], undef, $ver ];
832 });
833
834 return;
835 }
836
837 sub _parse_updated_section {
838 my ($self, $section) = @_;
839
3668a9e5
AV
840 $self->{updated_modules} = [];
841 return unless $section;
93b00636
FR
842 $self->{updated_modules} = $self->_parse_section($section => sub {
843 my ($el) = @_;
844
845 my ($first, $second) = @{ $el }[2, 3];
846 my $module = $first->[2];
3668a9e5
AV
847
848 # the regular expression matches the following:
849 # from VERSION_NUMBER to VERSION_NUMBER
850 # from VERSION_NUMBER to VERSION_NUMBER.
851 # from version VERSION_NUMBER to version VERSION_NUMBER.
852 # from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER
853 # from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER
854 #
3c7fa354 855 # some perldeltas contain more than one module listed in an entry, this only attempts to match the
3668a9e5 856 # first module
93b00636 857 my ($old, $new) = $second =~
3668a9e5 858 /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
93b00636
FR
859
860 warn "Unable to extract old or new version of $module from perldelta"
861 if !defined $old || !defined $new;
862
863 return [ $module, $old, $new ];
864 });
865
866 return;
867 }
868
869 sub _parse_removed_section {
870 my ($self, $section) = @_;
c014dfad
AV
871
872 $self->{removed_modules} = [];
873 return unless $section;
93b00636
FR
874 $self->{removed_modules} = $self->_parse_section($section => sub {
875 my ($el) = @_;
876
877 my ($first, $second) = @{ $el }[2, 3];
878 my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/;
879
880 return [ $first->[2], $old, undef ];
881 });
882
883 return;
884 }
885
886 sub _parse_section {
887 my ($self, $section, $parser) = @_;
888
889 my $items = $self->_look_down($section => sub {
890 my ($el) = @_;
891 return unless ref $el && $el->[0] =~ /^item-/
892 && @{ $el } > 2 && ref $el->[2];
5c404e2c 893 return unless $el->[2]->[0] =~ /C|L/;
93b00636
FR
894
895 return 1;
896 });
897
898 return [map { $parser->($_) } @{ $items }];
899 }
900
901 sub _look_down {
902 my ($self, $pod, $predicate) = @_;
903 my @pod = @{ $pod };
904
905 my @l;
906 while (my $el = shift @pod) {
907 push @l, $el if $predicate->($el);
908 if (ref $el) {
909 my @el = @{ $el };
910 splice @el, 0, 2;
911 unshift @pod, @el if @el;
912 }
913 }
914
915 return @l ? \@l : undef;
916 }
917
918 sub _look_for_section {
919 my ($self, $pod, $section) = @_;
920
921 my $level;
922 $self->_look_for_range($pod,
923 sub {
924 my ($el) = @_;
86c08a2c 925 my ($heading) = $el->[0] =~ /^head(\d)$/;
b88937b3 926 my $f = $heading && $el->[2] =~ /^$section/;
86c08a2c 927 $level = $heading if $f && !$level;
93b00636
FR
928 return $f;
929 },
930 sub {
931 my ($el) = @_;
932 $el->[0] =~ /^head(\d)$/ && $1 <= $level;
933 },
934 );
935 }
936
937 sub _look_for_range {
938 my ($self, $pod, $start_predicate, $stop_predicate) = @_;
939
940 my @l;
941 for my $el (@{ $pod }) {
942 if (@l) {
943 return \@l if $stop_predicate->($el);
944 }
945 else {
946 next unless $start_predicate->($el);
947 }
948 push @l, $el;
949 }
950
951 return;
952 }
2a19fd14 953}
88e14305 954
2a19fd14 955run;