This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
PATCH: [perl #121292] wrong perlunicode BOM claims
[perl5.git] / Porting / corelist-perldelta.pl
CommitLineData
603f4ea0
DG
1#!perl
2use 5.010;
3use strict;
4use warnings;
5use lib 'Porting';
6use Maintainers qw/%Modules/;
5e0e6da6 7use lib 'dist/Module-CoreList/lib';
603f4ea0 8use Module::CoreList;
2a19fd14 9use Getopt::Long;
e8097ff2
MM
10
11=head1 USAGE
12
13 # generate the module changes for the Perl you are currently building
67e92e89 14 ./perl -Ilib Porting/corelist-perldelta.pl
5c51ce30
AV
15
16 # update the module changes for the Perl you are currently building
59d2de25 17 ./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod
5c51ce30 18
e8097ff2 19 # generate a diff between the corelist sections of two perldelta* files:
80da4b39 20 perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod
e8097ff2
MM
21
22=head1 ABOUT
23
24corelist-perldelta.pl is a bit schizophrenic. The part to generate the
25new Perldelta text does not need Algorithm::Diff, but wants to be
26run with the freshly built Perl.
27
28The part to check the diff wants to be run with a Perl that has an up-to-date
29L<Module::CoreList>, but needs the outside L<Algorithm::Diff>.
30
31Ideally, the program will be split into two separate programs, one
b88937b3 32to generate the text and one to show the diff between the
e8097ff2
MM
33corelist sections of the last perldelta and the next perldelta.
34
59d2de25
AV
35Currently no information about Removed Modules is displayed in any of the
36modes.
54e78c5e 37
e8097ff2 38=cut
93b00636
FR
39
40my %sections = (
8d132c8e
MM
41 new => qr/New Modules and Pragma(ta)?/,
42 updated => qr/Updated Modules and Pragma(ta)?/,
43 removed => qr/Removed Modules and Pragma(ta)?/,
44);
45
46my %titles = (
caaa1415
MM
47 new => 'New Modules and Pragmata',
48 updated => 'Updated Modules and Pragmata',
49 removed => 'Removed Modules and Pragmata',
93b00636 50);
603f4ea0 51
88e14305
DG
52my $deprecated;
53
2a19fd14
FR
54sub run {
55 my %opt = (mode => 'generate');
56
57 GetOptions(\%opt,
5c51ce30 58 'mode|m:s', # 'generate', 'check', 'update'
2a19fd14
FR
59 );
60
93b00636 61 # by default, compare latest two version in CoreList;
c27d84d9 62 my ($old, $new) = latest_two_perl_versions();
5e1cf63b
AV
63
64 # use the provided versions if present
65 # @ARGV >=2 means [old_version] [new_version] [path/to/file]
66 if ( @ARGV >= 2) {
67 ($old, $new) = (shift @ARGV, shift @ARGV);
68 die "$old is an invalid version\n" if not exists
69 $Module::CoreList::version{$old};
29900347 70 die "$new is an invalid version\n" if not exists
5e1cf63b
AV
71 $Module::CoreList::version{$new};
72 }
2a19fd14 73
93b00636 74 if ( $opt{mode} eq 'generate' ) {
2a19fd14 75 do_generate($old => $new);
88e14305 76 }
93b00636
FR
77 elsif ( $opt{mode} eq 'check' ) {
78 do_check(\*ARGV, $old => $new);
79 }
5c51ce30
AV
80 elsif ( $opt{mode} eq 'update' ) {
81 do_update_existing(shift @ARGV, $old => $new);
82 }
88e14305 83 else {
2a19fd14
FR
84 die "Unrecognized mode '$opt{mode}'\n";
85 }
86
87 exit 0;
88}
89
c27d84d9
AC
90sub latest_two_perl_versions {
91
92 my @versions = sort keys %Module::CoreList::version;
93
94 my $new = pop @versions;
95
96 # If a fully-padded version number ends in a zero (as in "5.019010"), that
97 # version shows up in %Module::CoreList::version both with and without its
98 # trailing zeros. So skip all versions that are numerically equal to $new.
99 pop @versions while @versions && $versions[-1] == $new;
100
101 die "Too few distinct core versions in %Module::CoreList::version ?!\n"
102 if !@versions;
103
104 return $versions[-1], $new;
105}
106
f40724d5
AV
107# Given two perl versions, it returns a list describing the core distributions that have changed.
108# The first three elements are hashrefs corresponding to new, updated, and removed modules
109# and are of the form (mostly, see the special remarks about removed):
110# 'Distribution Name' => ['Distribution Name', previous version number, current version number]
3c7fa354
SH
111# where the version number is undef if the distribution did not exist.
112# The fourth element is an arrayref of core distribution names of those distribution for which it
113# is unknown whether they have changed and therefore need to be manually checked.
f40724d5
AV
114#
115# In most cases, the distribution name in %Modules corresponds to the module that is representative
116# of the distribution as listed in Module::CoreList. However, there are a few distribution names
3c7fa354 117# that do not correspond to a module. %distToModules has been created which maps the distribution
f40724d5
AV
118# name to a representative module. The representative module was chosen by either looking at the
119# Makefile of the distribution or by seeing which module the distribution has been traditionally
3c7fa354 120# listed under in past perldeltas.
f40724d5
AV
121#
122# There are a few distributions for which there is no single representative module (e.g. libnet).
123# These distributions are returned as the last element of the list.
124#
125# %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p.
126# This list contains modules and pragmata that may also be present in Module::CoreList.
127# A list of modules are in the list @unclaimedModules, which were manually listed based on whether
3c7fa354 128# they were independent modules and whether they have been listed in past perldeltas.
f40724d5
AV
129# The pragmata were found by doing something like:
130# say for sort grep { $_ eq lc $_ and !exists $Modules{$_}}
131# keys %{$Module::CoreList::version{'5.019003'}}
132# and manually filtering out pragamata that were already covered.
133#
134# It is currently not possible to differentiate between a removed module and a removed
135# distribution. Therefore, the removed hashref contains every module that has been removed, even if
136# the module's corresponding distribution has not been removed.
137
93b00636 138sub corelist_delta {
2a19fd14
FR
139 my ($old, $new) = @_;
140 my $corelist = \%Module::CoreList::version;
f40724d5 141 my %changes = Module::CoreList::changes_between( $old, $new );
2a19fd14
FR
142 $deprecated = $Module::CoreList::deprecated{$new};
143
f40724d5
AV
144 my $getModifyType = sub {
145 my $data = shift;
146 if ( exists $data->{left} and exists $data->{right} ) {
147 return 'updated';
148 }
149 elsif ( !exists $data->{left} and exists $data->{right} ) {
150 return 'new';
151 }
152 elsif ( exists $data->{left} and !exists $data->{right} ) {
153 return 'removed';
154 }
155 return undef;
156 };
157
158 my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap Win32CORE/;
92ebde38 159 my @unclaimedPragmata = qw/arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/;
f40724d5
AV
160 my @unclaimed = (@unclaimedModules, @unclaimedPragmata);
161
162 my %distToModules = (
163 'IO-Compress' => [
164 {
165 'name' => 'IO-Compress',
166 'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ),
167 'data' => $changes{'IO::Compress::Base'}
168 }
169 ],
170 'Locale-Codes' => [
171 {
172 'name' => 'Locale::Codes',
173 'modification' => $getModifyType->( $changes{'Locale::Codes'} ),
174 'data' => $changes{'Locale::Codes'}
175 }
176 ],
177 'PathTools' => [
178 {
179 'name' => 'File::Spec',
180 'modification' => $getModifyType->( $changes{'Cwd'} ),
181 'data' => $changes{'Cwd'}
182 }
183 ],
184 'Scalar-List-Utils' => [
185 {
186 'name' => 'List::Util',
187 'modification' => $getModifyType->( $changes{'List::Util'} ),
188 'data' => $changes{'List::Util'}
189 },
190 {
191 'name' => 'Scalar::Util',
192 'modification' => $getModifyType->( $changes{'Scalar::Util'} ),
193 'data' => $changes{'Scalar::Util'}
194 }
195 ],
196 'Text-Tabs+Wrap' => [
197 {
198 'name' => 'Text::Tabs',
199 'modification' => $getModifyType->( $changes{'Text::Tabs'} ),
200 'data' => $changes{'Text::Tabs'}
201 },
202 {
203 'name' => 'Text::Wrap',
204 'modification' => $getModifyType->( $changes{'Text::Wrap'} ),
205 'data' => $changes{'Text::Wrap'}
206 }
207 ],
208 );
209
210 # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ]
211 my $deltaGrouping = {};
212
213 # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it
214 my @manuallyCheck;
2a19fd14
FR
215
216 # %Modules defines what is currently in core
217 for my $k ( keys %Modules ) {
f40724d5
AV
218 next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed
219 next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed
220
221 my ( $distName, $modifyType, $data );
222
223 if ( exists $changes{$k} ) {
224 $distName = $k;
225 $modifyType = $getModifyType->( $changes{$k} );
226 $data = $changes{$k};
88e14305 227 }
f40724d5
AV
228 elsif ( exists $distToModules{$k} ) {
229 # modification will be undef if the distribution has not changed
230 my @modules = grep { $_->{modification} } @{ $distToModules{$k} };
231 for (@modules) {
232 $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ];
2a19fd14 233 }
f40724d5
AV
234 next;
235 }
236 else {
237 push @manuallyCheck, $k and next;
238 }
239
240 $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ];
241 }
242
243 for my $k (@unclaimed) {
244 if ( exists $changes{$k} ) {
245 $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} =
246 [ $k, $changes{$k}->{left}, $changes{$k}->{right} ];
88e14305
DG
247 }
248 }
603f4ea0 249
2a19fd14
FR
250 # in old corelist, but not this one => removed
251 # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from
252 # distributions will show up here, too. Some person will have to review to see what's
253 # important. That's the best we can do without a historical Maintainers.pl
254 for my $k ( keys %{ $corelist->{$old} } ) {
255 if ( ! exists $corelist->{$new}{$k} ) {
f40724d5 256 $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ];
2a19fd14 257 }
88e14305 258 }
88e14305 259
f40724d5
AV
260 return (
261 \%{ $deltaGrouping->{'new'} },
262 \%{ $deltaGrouping->{'removed'} },
263 \%{ $deltaGrouping->{'updated'} },
264 \@manuallyCheck
265 );
93b00636
FR
266}
267
5c51ce30
AV
268# currently does not update the Removed Module section
269sub do_update_existing {
270 my ( $existing, $old, $new ) = @_;
271
272 my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new );
273 if ($manuallyCheck) {
59d2de25
AV
274 print "It cannot be determined whether the following distributions have changed.\n";
275 print "Please check and list accordingly:\n";
5c51ce30 276 say "\t* $_" for sort @{$manuallyCheck};
59d2de25 277 print "\n";
5c51ce30
AV
278 }
279
280 my $data = {
281 new => $added,
282 updated => $updated,
283 #removed => $removed, ignore removed for now
284 };
285
286 my $text = DeltaUpdater::transform_pod( $existing, $data );
287 open my $out, '>', $existing or die "can't open perldelta file $existing: $!";
9c7cb461 288 binmode($out);
5c51ce30
AV
289 print $out $text;
290 close $out;
59d2de25
AV
291 say "The New and Updated Modules and Pragamata sections in $existing have been updated";
292 say "Please ensure the Removed Modules and Pragmata section is up-to-date";
5c51ce30
AV
293}
294
93b00636
FR
295sub do_generate {
296 my ($old, $new) = @_;
f40724d5
AV
297 my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new);
298
299 if ($manuallyCheck) {
59d2de25
AV
300 print "\nXXXIt cannot be determined whether the following distributions have changed.\n";
301 print "Please check and list accordingly:\n";
f40724d5 302 say "\t$_" for @{$manuallyCheck};
59d2de25 303 print "\n";
f40724d5 304 }
93b00636 305
3bb80250
AV
306 my $data = {
307 new => $added,
308 updated => $updated,
309 #removed => $removed, ignore removed for now
310 };
311
312 say DeltaUpdater::sections_to_pod($data)
93b00636
FR
313}
314
315sub do_check {
316 my ($in, $old, $new) = @_;
317
318 my $delta = DeltaParser->new($in);
f40724d5 319 my ($added, $removed, $updated) = corelist_delta($old => $new);
93b00636 320
fc353283
AV
321 # because of the difficulty in identifying the distribution for removed modules
322 # don't bother checking them
f40724d5 323 for my $ck ([ 'new', $delta->new_modules, $added ],
fc353283 324 #[ 'removed', $delta->removed_modules, $removed ],
f40724d5 325 [ 'updated', $delta->updated_modules, $updated ] ) {
93b00636 326 my @delta = @{ $ck->[1] };
f40724d5 327 my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] };
93b00636
FR
328
329 printf $ck->[0] . ":\n";
330
e8097ff2 331 require Algorithm::Diff;
93b00636
FR
332 my $diff = Algorithm::Diff->new(map {
333 [map { join q{ } => grep defined, @{ $_ } } @{ $_ }]
334 } \@delta, \@corelist);
335
336 while ($diff->Next) {
337 next if $diff->Same;
338 my $sep = '';
339 if (!$diff->Items(2)) {
340 printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 ));
341 } elsif(!$diff->Items(1)) {
342 printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 ));
343 } else {
344 $sep = "---\n";
345 printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 ));
346 }
fc353283 347 print "Delta< $_\n" for $diff->Items(1);
93b00636 348 print $sep;
fc353283 349 print "Corelist> $_\n" for $diff->Items(2);
93b00636
FR
350 }
351
352 print "\n";
353 }
354}
355
356{
5c51ce30
AV
357
358 package DeltaUpdater;
359 use List::Util 'reduce';
360
361 sub get_section_name_from_heading {
362 my $heading = shift;
363 while (my ($key, $expression) = each %sections) {
364 if ($heading =~ $expression) {
365 return $titles{$key};
366 }
367 }
368 die "$heading did not match any section";
369 }
370
371 sub is_desired_section_name {
372 for (values %sections) {
373 return 1 if $_[0] =~ $_;
374 }
375 return 0;
376 }
377
378 # verify the module and pragmata in the section, changing the stated version if necessary
379 # this subroutine warns if the module name cannot be parsed or if it is not listed in
380 # the results returned from corelist_delta()
381 #
382 # a side-effect of calling this function is that modules present in the section are
383 # removed from $data, resulting in $data containing only those modules and pragmata
384 # that were not listed in the perldelta file. This means we can then pass $data to
385 # add_to_section() without worrying about filtering out duplicates
386 sub update_section {
387 my ( $section, $data, $title ) = @_;
388 my @items = @{ $section->{items} };
389
390 for my $item (@items) {
391
392 my $content = $item->{text};
393 my $module = $item->{name};
394
318148ea
AV
395 #skip dummy items
396 next if !$module and $content =~ /\s*xx*\s*/i;
397
5c51ce30 398 say "Could not parse module name; line is:\n\t$content" and next unless $module;
59d2de25
AV
399
400 if ( !$data->{$title}{$module} ) {
401 print "$module is not listed as being $title in Module::CoreList.\n";
402 print "Ensure Module::CoreList has been updated and\n";
403 print "check to see that the distribution is not listed under another name.\n\n";
404 next;
405 }
5c51ce30
AV
406
407 if ( $title eq 'new' ) {
408 my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m;
409 say "Could not parse new version for $module; line is:\n\t$content" and next unless $new;
410 if ( $data->{$title}{$module}[2] ne $new ) {
411 say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
412 }
413 $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me;
414 }
415
416 elsif ( $title eq 'updated' ) {
417 my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
418 say "Could not parse old and new version for $module; line is:\n\t$content" and next
419 unless $prev and $new;
420 if ( $data->{$title}{$module}[1] ne $prev ) {
421 say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1];
422 }
423 if ( $data->{$title}{$module}[2] ne $new ) {
424 say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
425 }
426 $content =~
427 s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se;
428 }
429
430 elsif ( $title eq 'removed' ) {
431 my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m;
432 say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev;
433 if ( $data->{$title}{$module}[1] ne $prev ) {
434 say "$module: previous version differs; $prev " . $data->{$title}{$module}[1];
435 }
436 $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me;
437 }
438
439 delete $data->{$title}{$module};
440 $item->{text} = $content;
441 }
442 return $section;
443 }
444
445 # add modules and pragmata present in $data to the section
446 sub add_to_section {
447 my ( $section, $data, $title ) = @_;
448
8b591357 449 #undef is a valid version name in Module::CoreList so suppress warnings about concatenating undef values
5c51ce30
AV
450 no warnings 'uninitialized';
451 for ( values %{ $data->{$title} } ) {
452 my ( $mod, $old_v, $new_v ) = @{$_};
453 my ( $item, $text );
454
455 $item = { name => $mod, text => "=item *\n" };
456 if ( $title eq 'new' ) {
457 $text = "L<$mod> $new_v has been added to the Perl core.\n";
458 }
459
460 elsif ( $title eq 'updated' ) {
461 $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n";
462 if ( $deprecated->{$mod} ) {
463 $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n";
464 }
465 }
466
467 elsif ( $title eq 'removed' ) {
468 $text = "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n";
469 }
470
471 $item->{text} .= "\n$text\n";
472 push @{ $section->{items} }, $item;
473 }
474 return $section;
475 }
476
477 sub sort_items_in_section {
478 my ($section) = @_;
479
480 # if we could not parse the module name, it will be uninitalized
481 # in sort. This is not a problem as it will just result in these
8b591357 482 # sections being placed near the beginning of the section
5c51ce30
AV
483 no warnings 'uninitialized';
484 $section->{items} =
485 [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ];
486 return $section;
487 }
488
489 # given a hashref of the form returned by corelist_delta()
490 # and a hash structured as documented in transform_pod(), it returns
491 # a pod string representation of the sections, creating sections
492 # if necessary
493 sub sections_to_pod {
494 my ( $data, %sections ) = @_;
495 my $out = '';
496
497 for (
498 (
499 [ 'New Modules and Pragmata', 'new' ],
500 [ 'Updated Modules and Pragmata', 'updated' ],
501 [ 'Removed Modules and Pragmata', 'removed' ]
502 )
503 )
504 {
505 my ( $section_name, $title ) = @{$_};
506
507 my $section = $sections{$section_name} // {
3c7fa354
SH
508 name => $section_name,
509 preceding_text => "=head2 $_->[0]\n=over 4\n",
510 following_text => "=back\n",
511 items => [],
512 manual => 1
5c51ce30
AV
513 };
514
515 $section = update_section( $section, $data, $title );
516 $section = add_to_section( $section, $data, $title );
517 $section = sort_items_in_section( $section );
518
519 next if $section->{manual} and scalar @{ $section->{items} } == 0;
520
521 my $items = reduce { no warnings 'once'; $a . $b->{text} }
522 ( '', @{ $section->{items} } );
523 $out .=
3c7fa354 524 ( $section->{preceding_text} // '' )
5c51ce30
AV
525 . $items
526 . ( $section->{following_text} // '' );
527 }
528 return $out;
529 }
530
531 # given a filename corresponding to an existing perldelta file
532 # and a hashref of the form returned by corelist_delta(), it
533 # returns a string of the resulting file after the module
534 # information has been added.
535 sub transform_pod {
536 my ( $existing, $data ) = @_;
537
538 # will contain hashrefs corresponding to new, updated and removed
539 # modules and pragmata keyed by section name
540 # each section is hashref of the structure
3c7fa354
SH
541 # preceding_text => Text occurring before and including the over
542 # region containing the list of modules,
543 # items => [Arrayref of hashrefs corresponding to a module
544 # entry],
5c51ce30
AV
545 # an entry has the form:
546 # name => Module name or undef if the name could not be determined
547 # text => The text of the entry, including the item heading
548 #
3c7fa354
SH
549 # following_text => Any text not corresponding to a module
550 # that occurs after the first module
5c51ce30
AV
551 #
552 # the sections are converted to a pod string by calling sections_to_pod()
553 my %sections;
554
555 # we are in the Modules_and_Pragmata's section
556 my $in_Modules_and_Pragmata;
3c7fa354 557
5c51ce30
AV
558 # we are the Modules_and_Pragmata's section but have not
559 # encountered any of the desired sections. We use this
560 # flag to determine whether we should append the text to $out
561 # or we need to delay appending until the module listings are
562 # processed and instead append to $append_to_out
563 my $in_Modules_and_Pragmata_preamble;
3c7fa354 564
5c51ce30
AV
565 my $done_processing_Modules_and_Pragmata;
566
567 my $current_section;
3c7fa354 568
5c51ce30
AV
569 # $nested_element_level == 0 : not in an over region, treat lines as text
570 # $nested_element_level == 1 : presumably in the top over region that
571 # corresponds to the module listing. Treat
572 # each item as a module
573 # $nested_element_level > 1 : we only consider these values when we are in an item
574 # We treat lines as the text of the current item.
575 my $nested_element_level = 0;
3c7fa354 576
5c51ce30
AV
577 my $current_item;
578 my $need_to_parse_module_name;
579
580 my $out = '';
581 my $append_to_out = '';
582
583 open my $fh, '<', $existing or die "can't open perldelta file $existing: $!";
9c7cb461 584 binmode($fh);
5c51ce30
AV
585
586 while (<$fh>) {
587 # treat the rest of the file as plain text
588 if ($done_processing_Modules_and_Pragmata) {
589 $out .= $_;
590 next;
591 }
592
593 elsif ( !$in_Modules_and_Pragmata ) {
594 # entering Modules and Pragmata
595 if (/^=head1 Modules and Pragmata/) {
596 $in_Modules_and_Pragmata = 1;
597 $in_Modules_and_Pragmata_preamble = 1;
598 }
599 $out .= $_;
600 next;
601 }
602
603 # leaving Modules and Pragmata
604 elsif (/^=head1/) {
605 if ($current_section) {
606 push @{ $current_section->{items} }, $current_item
607 if $current_item;
608 $sections{ $current_section->{name} } = $current_section;
609 }
610 $done_processing_Modules_and_Pragmata = 1;
611 $out .=
612 sections_to_pod( $data, %sections ) . $append_to_out . $_;
613 next;
614 }
615
616 # new section in Modules and Pragmata
617 elsif (/^=head2 (.*?)$/) {
618 my $name = $1;
619 if ($current_section) {
620 push @{ $current_section->{items} }, $current_item
621 if $current_item;
622 $sections{ $current_section->{name} } = $current_section;
623 undef $current_section;
624 }
625
626 if ( is_desired_section_name($name) ) {
627 undef $in_Modules_and_Pragmata_preamble;
628 if ( $nested_element_level > 0 ) {
629 die "Unexpected head2 at line no. $.";
630 }
631 my $title = get_section_name_from_heading($name);
632 if ( exists $sections{$title} ) {
3c7fa354 633 die "$name occurred twice at line no. $.";
5c51ce30 634 }
3c7fa354
SH
635 $current_section = {};
636 $current_section->{name} = $title;
637 $current_section->{preceding_text} = $_;
638 $current_section->{items} = [];
639 $nested_element_level = 0;
5c51ce30
AV
640 next;
641 }
642
643 # otherwise treat section as plain text
644 else {
645 if ($in_Modules_and_Pragmata_preamble) {
646 $out .= $_;
647 }
648 else {
649 $append_to_out .= $_;
650 }
651 next;
652 }
653 }
654
655 elsif ($current_section) {
656
657 # not in an over region
658 if ( $nested_element_level == 0 ) {
659 if (/^=over/) {
660 $nested_element_level++;
661 }
662 if ( scalar @{ $current_section->{items} } > 0 ) {
663 $current_section->{following_text} .= $_;
664 }
665 else {
3c7fa354 666 $current_section->{preceding_text} .= $_;
5c51ce30
AV
667 }
668 next;
669 }
670
671 if ($current_item) {
672 if ($need_to_parse_module_name) {
673 # the item may not have a parsable module name, which means that
674 # $current_item->{name} will never be defined.
675 if (/^(?:L|C)<(.+?)>/) {
676 $current_item->{name} = $1;
677 undef $need_to_parse_module_name;
678 }
679 # =item or =back signals the end of an item
680 # block, which we handle below
681 if ( !/^=(?:item|back)/ ) {
682 $current_item->{text} .= $_;
683 next;
684 }
685 }
686 # currently in an over region
687 # treat text inside region as plain text
688 if ( $nested_element_level > 1 ) {
689 if (/^=back/) {
690 $nested_element_level--;
691 }
692 elsif (/^=over/) {
693 $nested_element_level++;
694 }
695 $current_item->{text} .= $_;
696 next;
697 }
698 # entering over region
699 if (/^=over/) {
700 $nested_element_level++;
701 $current_item->{text} .= $_;
702 next;
703 }
704 # =item or =back signals the end of an item
705 # block, which we handle below
706 if ( !/^=(?:item|back)/ ) {
707 $current_item->{text} .= $_;
708 next;
709 }
710 }
711
712 if (/^=item \*/) {
713 push @{ $current_section->{items} }, $current_item
714 if $current_item;
715 $current_item = { text => $_ };
716 $need_to_parse_module_name = 1;
717 next;
718 }
719
720 if (/^=back/) {
721 push @{ $current_section->{items} }, $current_item
722 if $current_item;
723 undef $current_item;
724 $nested_element_level--;
725 }
726
727 if ( scalar @{ $current_section->{items} } == 0 ) {
3c7fa354 728 $current_section->{preceding_text} .= $_;
5c51ce30
AV
729 }
730 else {
731 $current_section->{following_text} .= $_;
732 }
733 next;
734 }
735
736 # text in Modules and Pragmata not in a head2 region
737 else {
738 if ($in_Modules_and_Pragmata_preamble) {
739 $out .= $_;
740 }
741 else {
742 $append_to_out .= $_;
743 }
744 next;
745 }
746 }
747 close $fh;
748 die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata;
749 return $out;
750 }
751
752}
753
754{
93b00636
FR
755 package DeltaParser;
756 use Pod::Simple::SimpleTree;
757
758 sub new {
759 my ($class, $input) = @_;
760
761 my $self = bless {} => $class;
762
763 my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root;
764 splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure,
8d132c8e 765 # just the nodes within it
93b00636
FR
766
767 $self->_parse_delta($parsed_pod);
768
769 return $self;
770 }
771
c014dfad
AV
772 # creates the accessor methods:
773 # new_modules
774 # updated_modules
775 # removed_modules
93b00636
FR
776 for my $k (keys %sections) {
777 no strict 'refs';
778 my $m = "${k}_modules";
779 *$m = sub { $_[0]->{$m} };
780 }
781
782 sub _parse_delta {
783 my ($self, $pod) = @_;
784
c014dfad
AV
785 my $new_section = $self->_look_for_section( $pod, $sections{new} );
786 my $updated_section = $self->_look_for_section( $pod, $sections{updated} );
787 my $removed_section = $self->_look_for_section( $pod, $sections{removed} );
788
789 $self->_parse_new_section($new_section);
790 $self->_parse_updated_section($updated_section);
791 $self->_parse_removed_section($removed_section);
792
793 for (qw/new_modules updated_modules removed_modules/) {
794 $self->{$_} =
795 [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ];
93b00636
FR
796 }
797
798 return;
799 }
800
801 sub _parse_new_section {
802 my ($self, $section) = @_;
803
c014dfad
AV
804 $self->{new_modules} = [];
805 return unless $section;
93b00636
FR
806 $self->{new_modules} = $self->_parse_section($section => sub {
807 my ($el) = @_;
808
809 my ($first, $second) = @{ $el }[2, 3];
810 my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/;
811
812 return [ $first->[2], undef, $ver ];
813 });
814
815 return;
816 }
817
818 sub _parse_updated_section {
819 my ($self, $section) = @_;
820
3668a9e5
AV
821 $self->{updated_modules} = [];
822 return unless $section;
93b00636
FR
823 $self->{updated_modules} = $self->_parse_section($section => sub {
824 my ($el) = @_;
825
826 my ($first, $second) = @{ $el }[2, 3];
827 my $module = $first->[2];
3668a9e5
AV
828
829 # the regular expression matches the following:
830 # from VERSION_NUMBER to VERSION_NUMBER
831 # from VERSION_NUMBER to VERSION_NUMBER.
832 # from version VERSION_NUMBER to version VERSION_NUMBER.
833 # from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER
834 # from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER
835 #
3c7fa354 836 # some perldeltas contain more than one module listed in an entry, this only attempts to match the
3668a9e5 837 # first module
93b00636 838 my ($old, $new) = $second =~
3668a9e5 839 /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
93b00636
FR
840
841 warn "Unable to extract old or new version of $module from perldelta"
842 if !defined $old || !defined $new;
843
844 return [ $module, $old, $new ];
845 });
846
847 return;
848 }
849
850 sub _parse_removed_section {
851 my ($self, $section) = @_;
c014dfad
AV
852
853 $self->{removed_modules} = [];
854 return unless $section;
93b00636
FR
855 $self->{removed_modules} = $self->_parse_section($section => sub {
856 my ($el) = @_;
857
858 my ($first, $second) = @{ $el }[2, 3];
859 my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/;
860
861 return [ $first->[2], $old, undef ];
862 });
863
864 return;
865 }
866
867 sub _parse_section {
868 my ($self, $section, $parser) = @_;
869
870 my $items = $self->_look_down($section => sub {
871 my ($el) = @_;
872 return unless ref $el && $el->[0] =~ /^item-/
873 && @{ $el } > 2 && ref $el->[2];
5c404e2c 874 return unless $el->[2]->[0] =~ /C|L/;
93b00636
FR
875
876 return 1;
877 });
878
879 return [map { $parser->($_) } @{ $items }];
880 }
881
882 sub _look_down {
883 my ($self, $pod, $predicate) = @_;
884 my @pod = @{ $pod };
885
886 my @l;
887 while (my $el = shift @pod) {
888 push @l, $el if $predicate->($el);
889 if (ref $el) {
890 my @el = @{ $el };
891 splice @el, 0, 2;
892 unshift @pod, @el if @el;
893 }
894 }
895
896 return @l ? \@l : undef;
897 }
898
899 sub _look_for_section {
900 my ($self, $pod, $section) = @_;
901
902 my $level;
903 $self->_look_for_range($pod,
904 sub {
905 my ($el) = @_;
86c08a2c 906 my ($heading) = $el->[0] =~ /^head(\d)$/;
b88937b3 907 my $f = $heading && $el->[2] =~ /^$section/;
86c08a2c 908 $level = $heading if $f && !$level;
93b00636
FR
909 return $f;
910 },
911 sub {
912 my ($el) = @_;
913 $el->[0] =~ /^head(\d)$/ && $1 <= $level;
914 },
915 );
916 }
917
918 sub _look_for_range {
919 my ($self, $pod, $start_predicate, $stop_predicate) = @_;
920
921 my @l;
922 for my $el (@{ $pod }) {
923 if (@l) {
924 return \@l if $stop_predicate->($el);
925 }
926 else {
927 next unless $start_predicate->($el);
928 }
929 push @l, $el;
930 }
931
932 return;
933 }
2a19fd14 934}
88e14305 935
2a19fd14 936run;