This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perldelta for 257518b902
[perl5.git] / Porting / corelist-perldelta.pl
CommitLineData
603f4ea0
DG
1#!perl
2use 5.010;
3use strict;
4use warnings;
5use lib 'Porting';
6use Maintainers qw/%Modules/;
5e0e6da6 7use lib 'dist/Module-CoreList/lib';
603f4ea0 8use Module::CoreList;
2a19fd14 9use Getopt::Long;
e8097ff2
MM
10
11=head1 USAGE
12
13 # generate the module changes for the Perl you are currently building
67e92e89 14 ./perl -Ilib Porting/corelist-perldelta.pl
5c51ce30
AV
15
16 # update the module changes for the Perl you are currently building
59d2de25 17 ./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod
5c51ce30 18
e8097ff2 19 # generate a diff between the corelist sections of two perldelta* files:
80da4b39 20 perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod
e8097ff2
MM
21
22=head1 ABOUT
23
24corelist-perldelta.pl is a bit schizophrenic. The part to generate the
25new Perldelta text does not need Algorithm::Diff, but wants to be
26run with the freshly built Perl.
27
28The part to check the diff wants to be run with a Perl that has an up-to-date
29L<Module::CoreList>, but needs the outside L<Algorithm::Diff>.
30
31Ideally, the program will be split into two separate programs, one
b88937b3 32to generate the text and one to show the diff between the
e8097ff2
MM
33corelist sections of the last perldelta and the next perldelta.
34
59d2de25
AV
35Currently no information about Removed Modules is displayed in any of the
36modes.
e8097ff2 37=cut
93b00636
FR
38
39my %sections = (
8d132c8e
MM
40 new => qr/New Modules and Pragma(ta)?/,
41 updated => qr/Updated Modules and Pragma(ta)?/,
42 removed => qr/Removed Modules and Pragma(ta)?/,
43);
44
45my %titles = (
caaa1415
MM
46 new => 'New Modules and Pragmata',
47 updated => 'Updated Modules and Pragmata',
48 removed => 'Removed Modules and Pragmata',
93b00636 49);
603f4ea0 50
88e14305
DG
51my $deprecated;
52
2a19fd14
FR
53sub run {
54 my %opt = (mode => 'generate');
55
56 GetOptions(\%opt,
5c51ce30 57 'mode|m:s', # 'generate', 'check', 'update'
2a19fd14
FR
58 );
59
93b00636
FR
60 # by default, compare latest two version in CoreList;
61 my @versions = sort keys %Module::CoreList::version;
5e1cf63b
AV
62 my $old = $versions[-2];
63 my $new = $versions[-1];
64
65 # use the provided versions if present
66 # @ARGV >=2 means [old_version] [new_version] [path/to/file]
67 if ( @ARGV >= 2) {
68 ($old, $new) = (shift @ARGV, shift @ARGV);
69 die "$old is an invalid version\n" if not exists
70 $Module::CoreList::version{$old};
71 die "$new is an invalid verison\n" if not exists
72 $Module::CoreList::version{$new};
73 }
2a19fd14 74
93b00636 75 if ( $opt{mode} eq 'generate' ) {
2a19fd14 76 do_generate($old => $new);
88e14305 77 }
93b00636
FR
78 elsif ( $opt{mode} eq 'check' ) {
79 do_check(\*ARGV, $old => $new);
80 }
5c51ce30
AV
81 elsif ( $opt{mode} eq 'update' ) {
82 do_update_existing(shift @ARGV, $old => $new);
83 }
88e14305 84 else {
2a19fd14
FR
85 die "Unrecognized mode '$opt{mode}'\n";
86 }
87
88 exit 0;
89}
90
f40724d5
AV
91# Given two perl versions, it returns a list describing the core distributions that have changed.
92# The first three elements are hashrefs corresponding to new, updated, and removed modules
93# and are of the form (mostly, see the special remarks about removed):
94# 'Distribution Name' => ['Distribution Name', previous version number, current version number]
3c7fa354
SH
95# where the version number is undef if the distribution did not exist.
96# The fourth element is an arrayref of core distribution names of those distribution for which it
97# is unknown whether they have changed and therefore need to be manually checked.
f40724d5
AV
98#
99# In most cases, the distribution name in %Modules corresponds to the module that is representative
100# of the distribution as listed in Module::CoreList. However, there are a few distribution names
3c7fa354 101# that do not correspond to a module. %distToModules has been created which maps the distribution
f40724d5
AV
102# name to a representative module. The representative module was chosen by either looking at the
103# Makefile of the distribution or by seeing which module the distribution has been traditionally
3c7fa354 104# listed under in past perldeltas.
f40724d5
AV
105#
106# There are a few distributions for which there is no single representative module (e.g. libnet).
107# These distributions are returned as the last element of the list.
108#
109# %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p.
110# This list contains modules and pragmata that may also be present in Module::CoreList.
111# A list of modules are in the list @unclaimedModules, which were manually listed based on whether
3c7fa354 112# they were independent modules and whether they have been listed in past perldeltas.
f40724d5
AV
113# The pragmata were found by doing something like:
114# say for sort grep { $_ eq lc $_ and !exists $Modules{$_}}
115# keys %{$Module::CoreList::version{'5.019003'}}
116# and manually filtering out pragamata that were already covered.
117#
118# It is currently not possible to differentiate between a removed module and a removed
119# distribution. Therefore, the removed hashref contains every module that has been removed, even if
120# the module's corresponding distribution has not been removed.
121
93b00636 122sub corelist_delta {
2a19fd14
FR
123 my ($old, $new) = @_;
124 my $corelist = \%Module::CoreList::version;
f40724d5 125 my %changes = Module::CoreList::changes_between( $old, $new );
2a19fd14
FR
126 $deprecated = $Module::CoreList::deprecated{$new};
127
f40724d5
AV
128 my $getModifyType = sub {
129 my $data = shift;
130 if ( exists $data->{left} and exists $data->{right} ) {
131 return 'updated';
132 }
133 elsif ( !exists $data->{left} and exists $data->{right} ) {
134 return 'new';
135 }
136 elsif ( exists $data->{left} and !exists $data->{right} ) {
137 return 'removed';
138 }
139 return undef;
140 };
141
142 my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap Win32CORE/;
143 my @unclaimedPragmata = qw/_charnames arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/;
144 my @unclaimed = (@unclaimedModules, @unclaimedPragmata);
145
146 my %distToModules = (
147 'IO-Compress' => [
148 {
149 'name' => 'IO-Compress',
150 'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ),
151 'data' => $changes{'IO::Compress::Base'}
152 }
153 ],
154 'Locale-Codes' => [
155 {
156 'name' => 'Locale::Codes',
157 'modification' => $getModifyType->( $changes{'Locale::Codes'} ),
158 'data' => $changes{'Locale::Codes'}
159 }
160 ],
161 'PathTools' => [
162 {
163 'name' => 'File::Spec',
164 'modification' => $getModifyType->( $changes{'Cwd'} ),
165 'data' => $changes{'Cwd'}
166 }
167 ],
168 'Scalar-List-Utils' => [
169 {
170 'name' => 'List::Util',
171 'modification' => $getModifyType->( $changes{'List::Util'} ),
172 'data' => $changes{'List::Util'}
173 },
174 {
175 'name' => 'Scalar::Util',
176 'modification' => $getModifyType->( $changes{'Scalar::Util'} ),
177 'data' => $changes{'Scalar::Util'}
178 }
179 ],
180 'Text-Tabs+Wrap' => [
181 {
182 'name' => 'Text::Tabs',
183 'modification' => $getModifyType->( $changes{'Text::Tabs'} ),
184 'data' => $changes{'Text::Tabs'}
185 },
186 {
187 'name' => 'Text::Wrap',
188 'modification' => $getModifyType->( $changes{'Text::Wrap'} ),
189 'data' => $changes{'Text::Wrap'}
190 }
191 ],
192 );
193
194 # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ]
195 my $deltaGrouping = {};
196
197 # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it
198 my @manuallyCheck;
2a19fd14
FR
199
200 # %Modules defines what is currently in core
201 for my $k ( keys %Modules ) {
f40724d5
AV
202 next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed
203 next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed
204
205 my ( $distName, $modifyType, $data );
206
207 if ( exists $changes{$k} ) {
208 $distName = $k;
209 $modifyType = $getModifyType->( $changes{$k} );
210 $data = $changes{$k};
88e14305 211 }
f40724d5
AV
212 elsif ( exists $distToModules{$k} ) {
213 # modification will be undef if the distribution has not changed
214 my @modules = grep { $_->{modification} } @{ $distToModules{$k} };
215 for (@modules) {
216 $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ];
2a19fd14 217 }
f40724d5
AV
218 next;
219 }
220 else {
221 push @manuallyCheck, $k and next;
222 }
223
224 $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ];
225 }
226
227 for my $k (@unclaimed) {
228 if ( exists $changes{$k} ) {
229 $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} =
230 [ $k, $changes{$k}->{left}, $changes{$k}->{right} ];
88e14305
DG
231 }
232 }
603f4ea0 233
2a19fd14
FR
234 # in old corelist, but not this one => removed
235 # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from
236 # distributions will show up here, too. Some person will have to review to see what's
237 # important. That's the best we can do without a historical Maintainers.pl
238 for my $k ( keys %{ $corelist->{$old} } ) {
239 if ( ! exists $corelist->{$new}{$k} ) {
f40724d5 240 $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ];
2a19fd14 241 }
88e14305 242 }
88e14305 243
f40724d5
AV
244 return (
245 \%{ $deltaGrouping->{'new'} },
246 \%{ $deltaGrouping->{'removed'} },
247 \%{ $deltaGrouping->{'updated'} },
248 \@manuallyCheck
249 );
93b00636
FR
250}
251
5c51ce30
AV
252# currently does not update the Removed Module section
253sub do_update_existing {
254 my ( $existing, $old, $new ) = @_;
255
256 my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new );
257 if ($manuallyCheck) {
59d2de25
AV
258 print "It cannot be determined whether the following distributions have changed.\n";
259 print "Please check and list accordingly:\n";
5c51ce30 260 say "\t* $_" for sort @{$manuallyCheck};
59d2de25 261 print "\n";
5c51ce30
AV
262 }
263
264 my $data = {
265 new => $added,
266 updated => $updated,
267 #removed => $removed, ignore removed for now
268 };
269
270 my $text = DeltaUpdater::transform_pod( $existing, $data );
271 open my $out, '>', $existing or die "can't open perldelta file $existing: $!";
9c7cb461 272 binmode($out);
5c51ce30
AV
273 print $out $text;
274 close $out;
59d2de25
AV
275 say "The New and Updated Modules and Pragamata sections in $existing have been updated";
276 say "Please ensure the Removed Modules and Pragmata section is up-to-date";
5c51ce30
AV
277}
278
93b00636
FR
279sub do_generate {
280 my ($old, $new) = @_;
f40724d5
AV
281 my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new);
282
283 if ($manuallyCheck) {
59d2de25
AV
284 print "\nXXXIt cannot be determined whether the following distributions have changed.\n";
285 print "Please check and list accordingly:\n";
f40724d5 286 say "\t$_" for @{$manuallyCheck};
59d2de25 287 print "\n";
f40724d5 288 }
93b00636 289
3bb80250
AV
290 my $data = {
291 new => $added,
292 updated => $updated,
293 #removed => $removed, ignore removed for now
294 };
295
296 say DeltaUpdater::sections_to_pod($data)
93b00636
FR
297}
298
299sub do_check {
300 my ($in, $old, $new) = @_;
301
302 my $delta = DeltaParser->new($in);
f40724d5 303 my ($added, $removed, $updated) = corelist_delta($old => $new);
93b00636 304
fc353283
AV
305 # because of the difficulty in identifying the distribution for removed modules
306 # don't bother checking them
f40724d5 307 for my $ck ([ 'new', $delta->new_modules, $added ],
fc353283 308 #[ 'removed', $delta->removed_modules, $removed ],
f40724d5 309 [ 'updated', $delta->updated_modules, $updated ] ) {
93b00636 310 my @delta = @{ $ck->[1] };
f40724d5 311 my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] };
93b00636
FR
312
313 printf $ck->[0] . ":\n";
314
e8097ff2 315 require Algorithm::Diff;
93b00636
FR
316 my $diff = Algorithm::Diff->new(map {
317 [map { join q{ } => grep defined, @{ $_ } } @{ $_ }]
318 } \@delta, \@corelist);
319
320 while ($diff->Next) {
321 next if $diff->Same;
322 my $sep = '';
323 if (!$diff->Items(2)) {
324 printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 ));
325 } elsif(!$diff->Items(1)) {
326 printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 ));
327 } else {
328 $sep = "---\n";
329 printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 ));
330 }
fc353283 331 print "Delta< $_\n" for $diff->Items(1);
93b00636 332 print $sep;
fc353283 333 print "Corelist> $_\n" for $diff->Items(2);
93b00636
FR
334 }
335
336 print "\n";
337 }
338}
339
340{
5c51ce30
AV
341
342 package DeltaUpdater;
343 use List::Util 'reduce';
344
345 sub get_section_name_from_heading {
346 my $heading = shift;
347 while (my ($key, $expression) = each %sections) {
348 if ($heading =~ $expression) {
349 return $titles{$key};
350 }
351 }
352 die "$heading did not match any section";
353 }
354
355 sub is_desired_section_name {
356 for (values %sections) {
357 return 1 if $_[0] =~ $_;
358 }
359 return 0;
360 }
361
362 # verify the module and pragmata in the section, changing the stated version if necessary
363 # this subroutine warns if the module name cannot be parsed or if it is not listed in
364 # the results returned from corelist_delta()
365 #
366 # a side-effect of calling this function is that modules present in the section are
367 # removed from $data, resulting in $data containing only those modules and pragmata
368 # that were not listed in the perldelta file. This means we can then pass $data to
369 # add_to_section() without worrying about filtering out duplicates
370 sub update_section {
371 my ( $section, $data, $title ) = @_;
372 my @items = @{ $section->{items} };
373
374 for my $item (@items) {
375
376 my $content = $item->{text};
377 my $module = $item->{name};
378
318148ea
AV
379 #skip dummy items
380 next if !$module and $content =~ /\s*xx*\s*/i;
381
5c51ce30 382 say "Could not parse module name; line is:\n\t$content" and next unless $module;
59d2de25
AV
383
384 if ( !$data->{$title}{$module} ) {
385 print "$module is not listed as being $title in Module::CoreList.\n";
386 print "Ensure Module::CoreList has been updated and\n";
387 print "check to see that the distribution is not listed under another name.\n\n";
388 next;
389 }
5c51ce30
AV
390
391 if ( $title eq 'new' ) {
392 my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m;
393 say "Could not parse new version for $module; line is:\n\t$content" and next unless $new;
394 if ( $data->{$title}{$module}[2] ne $new ) {
395 say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
396 }
397 $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me;
398 }
399
400 elsif ( $title eq 'updated' ) {
401 my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
402 say "Could not parse old and new version for $module; line is:\n\t$content" and next
403 unless $prev and $new;
404 if ( $data->{$title}{$module}[1] ne $prev ) {
405 say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1];
406 }
407 if ( $data->{$title}{$module}[2] ne $new ) {
408 say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
409 }
410 $content =~
411 s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se;
412 }
413
414 elsif ( $title eq 'removed' ) {
415 my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m;
416 say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev;
417 if ( $data->{$title}{$module}[1] ne $prev ) {
418 say "$module: previous version differs; $prev " . $data->{$title}{$module}[1];
419 }
420 $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me;
421 }
422
423 delete $data->{$title}{$module};
424 $item->{text} = $content;
425 }
426 return $section;
427 }
428
429 # add modules and pragmata present in $data to the section
430 sub add_to_section {
431 my ( $section, $data, $title ) = @_;
432
433 #undef is a valid version name in Module::CoreList so supress warnings about concatenating undef values
434 no warnings 'uninitialized';
435 for ( values %{ $data->{$title} } ) {
436 my ( $mod, $old_v, $new_v ) = @{$_};
437 my ( $item, $text );
438
439 $item = { name => $mod, text => "=item *\n" };
440 if ( $title eq 'new' ) {
441 $text = "L<$mod> $new_v has been added to the Perl core.\n";
442 }
443
444 elsif ( $title eq 'updated' ) {
445 $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n";
446 if ( $deprecated->{$mod} ) {
447 $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n";
448 }
449 }
450
451 elsif ( $title eq 'removed' ) {
452 $text = "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n";
453 }
454
455 $item->{text} .= "\n$text\n";
456 push @{ $section->{items} }, $item;
457 }
458 return $section;
459 }
460
461 sub sort_items_in_section {
462 my ($section) = @_;
463
464 # if we could not parse the module name, it will be uninitalized
465 # in sort. This is not a problem as it will just result in these
466 # sections being placed near the begining of the section
467 no warnings 'uninitialized';
468 $section->{items} =
469 [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ];
470 return $section;
471 }
472
473 # given a hashref of the form returned by corelist_delta()
474 # and a hash structured as documented in transform_pod(), it returns
475 # a pod string representation of the sections, creating sections
476 # if necessary
477 sub sections_to_pod {
478 my ( $data, %sections ) = @_;
479 my $out = '';
480
481 for (
482 (
483 [ 'New Modules and Pragmata', 'new' ],
484 [ 'Updated Modules and Pragmata', 'updated' ],
485 [ 'Removed Modules and Pragmata', 'removed' ]
486 )
487 )
488 {
489 my ( $section_name, $title ) = @{$_};
490
491 my $section = $sections{$section_name} // {
3c7fa354
SH
492 name => $section_name,
493 preceding_text => "=head2 $_->[0]\n=over 4\n",
494 following_text => "=back\n",
495 items => [],
496 manual => 1
5c51ce30
AV
497 };
498
499 $section = update_section( $section, $data, $title );
500 $section = add_to_section( $section, $data, $title );
501 $section = sort_items_in_section( $section );
502
503 next if $section->{manual} and scalar @{ $section->{items} } == 0;
504
505 my $items = reduce { no warnings 'once'; $a . $b->{text} }
506 ( '', @{ $section->{items} } );
507 $out .=
3c7fa354 508 ( $section->{preceding_text} // '' )
5c51ce30
AV
509 . $items
510 . ( $section->{following_text} // '' );
511 }
512 return $out;
513 }
514
515 # given a filename corresponding to an existing perldelta file
516 # and a hashref of the form returned by corelist_delta(), it
517 # returns a string of the resulting file after the module
518 # information has been added.
519 sub transform_pod {
520 my ( $existing, $data ) = @_;
521
522 # will contain hashrefs corresponding to new, updated and removed
523 # modules and pragmata keyed by section name
524 # each section is hashref of the structure
3c7fa354
SH
525 # preceding_text => Text occurring before and including the over
526 # region containing the list of modules,
527 # items => [Arrayref of hashrefs corresponding to a module
528 # entry],
5c51ce30
AV
529 # an entry has the form:
530 # name => Module name or undef if the name could not be determined
531 # text => The text of the entry, including the item heading
532 #
3c7fa354
SH
533 # following_text => Any text not corresponding to a module
534 # that occurs after the first module
5c51ce30
AV
535 #
536 # the sections are converted to a pod string by calling sections_to_pod()
537 my %sections;
538
539 # we are in the Modules_and_Pragmata's section
540 my $in_Modules_and_Pragmata;
3c7fa354 541
5c51ce30
AV
542 # we are the Modules_and_Pragmata's section but have not
543 # encountered any of the desired sections. We use this
544 # flag to determine whether we should append the text to $out
545 # or we need to delay appending until the module listings are
546 # processed and instead append to $append_to_out
547 my $in_Modules_and_Pragmata_preamble;
3c7fa354 548
5c51ce30
AV
549 my $done_processing_Modules_and_Pragmata;
550
551 my $current_section;
3c7fa354 552
5c51ce30
AV
553 # $nested_element_level == 0 : not in an over region, treat lines as text
554 # $nested_element_level == 1 : presumably in the top over region that
555 # corresponds to the module listing. Treat
556 # each item as a module
557 # $nested_element_level > 1 : we only consider these values when we are in an item
558 # We treat lines as the text of the current item.
559 my $nested_element_level = 0;
3c7fa354 560
5c51ce30
AV
561 my $current_item;
562 my $need_to_parse_module_name;
563
564 my $out = '';
565 my $append_to_out = '';
566
567 open my $fh, '<', $existing or die "can't open perldelta file $existing: $!";
9c7cb461 568 binmode($fh);
5c51ce30
AV
569
570 while (<$fh>) {
571 # treat the rest of the file as plain text
572 if ($done_processing_Modules_and_Pragmata) {
573 $out .= $_;
574 next;
575 }
576
577 elsif ( !$in_Modules_and_Pragmata ) {
578 # entering Modules and Pragmata
579 if (/^=head1 Modules and Pragmata/) {
580 $in_Modules_and_Pragmata = 1;
581 $in_Modules_and_Pragmata_preamble = 1;
582 }
583 $out .= $_;
584 next;
585 }
586
587 # leaving Modules and Pragmata
588 elsif (/^=head1/) {
589 if ($current_section) {
590 push @{ $current_section->{items} }, $current_item
591 if $current_item;
592 $sections{ $current_section->{name} } = $current_section;
593 }
594 $done_processing_Modules_and_Pragmata = 1;
595 $out .=
596 sections_to_pod( $data, %sections ) . $append_to_out . $_;
597 next;
598 }
599
600 # new section in Modules and Pragmata
601 elsif (/^=head2 (.*?)$/) {
602 my $name = $1;
603 if ($current_section) {
604 push @{ $current_section->{items} }, $current_item
605 if $current_item;
606 $sections{ $current_section->{name} } = $current_section;
607 undef $current_section;
608 }
609
610 if ( is_desired_section_name($name) ) {
611 undef $in_Modules_and_Pragmata_preamble;
612 if ( $nested_element_level > 0 ) {
613 die "Unexpected head2 at line no. $.";
614 }
615 my $title = get_section_name_from_heading($name);
616 if ( exists $sections{$title} ) {
3c7fa354 617 die "$name occurred twice at line no. $.";
5c51ce30 618 }
3c7fa354
SH
619 $current_section = {};
620 $current_section->{name} = $title;
621 $current_section->{preceding_text} = $_;
622 $current_section->{items} = [];
623 $nested_element_level = 0;
5c51ce30
AV
624 next;
625 }
626
627 # otherwise treat section as plain text
628 else {
629 if ($in_Modules_and_Pragmata_preamble) {
630 $out .= $_;
631 }
632 else {
633 $append_to_out .= $_;
634 }
635 next;
636 }
637 }
638
639 elsif ($current_section) {
640
641 # not in an over region
642 if ( $nested_element_level == 0 ) {
643 if (/^=over/) {
644 $nested_element_level++;
645 }
646 if ( scalar @{ $current_section->{items} } > 0 ) {
647 $current_section->{following_text} .= $_;
648 }
649 else {
3c7fa354 650 $current_section->{preceding_text} .= $_;
5c51ce30
AV
651 }
652 next;
653 }
654
655 if ($current_item) {
656 if ($need_to_parse_module_name) {
657 # the item may not have a parsable module name, which means that
658 # $current_item->{name} will never be defined.
659 if (/^(?:L|C)<(.+?)>/) {
660 $current_item->{name} = $1;
661 undef $need_to_parse_module_name;
662 }
663 # =item or =back signals the end of an item
664 # block, which we handle below
665 if ( !/^=(?:item|back)/ ) {
666 $current_item->{text} .= $_;
667 next;
668 }
669 }
670 # currently in an over region
671 # treat text inside region as plain text
672 if ( $nested_element_level > 1 ) {
673 if (/^=back/) {
674 $nested_element_level--;
675 }
676 elsif (/^=over/) {
677 $nested_element_level++;
678 }
679 $current_item->{text} .= $_;
680 next;
681 }
682 # entering over region
683 if (/^=over/) {
684 $nested_element_level++;
685 $current_item->{text} .= $_;
686 next;
687 }
688 # =item or =back signals the end of an item
689 # block, which we handle below
690 if ( !/^=(?:item|back)/ ) {
691 $current_item->{text} .= $_;
692 next;
693 }
694 }
695
696 if (/^=item \*/) {
697 push @{ $current_section->{items} }, $current_item
698 if $current_item;
699 $current_item = { text => $_ };
700 $need_to_parse_module_name = 1;
701 next;
702 }
703
704 if (/^=back/) {
705 push @{ $current_section->{items} }, $current_item
706 if $current_item;
707 undef $current_item;
708 $nested_element_level--;
709 }
710
711 if ( scalar @{ $current_section->{items} } == 0 ) {
3c7fa354 712 $current_section->{preceding_text} .= $_;
5c51ce30
AV
713 }
714 else {
715 $current_section->{following_text} .= $_;
716 }
717 next;
718 }
719
720 # text in Modules and Pragmata not in a head2 region
721 else {
722 if ($in_Modules_and_Pragmata_preamble) {
723 $out .= $_;
724 }
725 else {
726 $append_to_out .= $_;
727 }
728 next;
729 }
730 }
731 close $fh;
732 die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata;
733 return $out;
734 }
735
736}
737
738{
93b00636
FR
739 package DeltaParser;
740 use Pod::Simple::SimpleTree;
741
742 sub new {
743 my ($class, $input) = @_;
744
745 my $self = bless {} => $class;
746
747 my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root;
748 splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure,
8d132c8e 749 # just the nodes within it
93b00636
FR
750
751 $self->_parse_delta($parsed_pod);
752
753 return $self;
754 }
755
c014dfad
AV
756 # creates the accessor methods:
757 # new_modules
758 # updated_modules
759 # removed_modules
93b00636
FR
760 for my $k (keys %sections) {
761 no strict 'refs';
762 my $m = "${k}_modules";
763 *$m = sub { $_[0]->{$m} };
764 }
765
766 sub _parse_delta {
767 my ($self, $pod) = @_;
768
c014dfad
AV
769 my $new_section = $self->_look_for_section( $pod, $sections{new} );
770 my $updated_section = $self->_look_for_section( $pod, $sections{updated} );
771 my $removed_section = $self->_look_for_section( $pod, $sections{removed} );
772
773 $self->_parse_new_section($new_section);
774 $self->_parse_updated_section($updated_section);
775 $self->_parse_removed_section($removed_section);
776
777 for (qw/new_modules updated_modules removed_modules/) {
778 $self->{$_} =
779 [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ];
93b00636
FR
780 }
781
782 return;
783 }
784
785 sub _parse_new_section {
786 my ($self, $section) = @_;
787
c014dfad
AV
788 $self->{new_modules} = [];
789 return unless $section;
93b00636
FR
790 $self->{new_modules} = $self->_parse_section($section => sub {
791 my ($el) = @_;
792
793 my ($first, $second) = @{ $el }[2, 3];
794 my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/;
795
796 return [ $first->[2], undef, $ver ];
797 });
798
799 return;
800 }
801
802 sub _parse_updated_section {
803 my ($self, $section) = @_;
804
3668a9e5
AV
805 $self->{updated_modules} = [];
806 return unless $section;
93b00636
FR
807 $self->{updated_modules} = $self->_parse_section($section => sub {
808 my ($el) = @_;
809
810 my ($first, $second) = @{ $el }[2, 3];
811 my $module = $first->[2];
3668a9e5
AV
812
813 # the regular expression matches the following:
814 # from VERSION_NUMBER to VERSION_NUMBER
815 # from VERSION_NUMBER to VERSION_NUMBER.
816 # from version VERSION_NUMBER to version VERSION_NUMBER.
817 # from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER
818 # from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER
819 #
3c7fa354 820 # some perldeltas contain more than one module listed in an entry, this only attempts to match the
3668a9e5 821 # first module
93b00636 822 my ($old, $new) = $second =~
3668a9e5 823 /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
93b00636
FR
824
825 warn "Unable to extract old or new version of $module from perldelta"
826 if !defined $old || !defined $new;
827
828 return [ $module, $old, $new ];
829 });
830
831 return;
832 }
833
834 sub _parse_removed_section {
835 my ($self, $section) = @_;
c014dfad
AV
836
837 $self->{removed_modules} = [];
838 return unless $section;
93b00636
FR
839 $self->{removed_modules} = $self->_parse_section($section => sub {
840 my ($el) = @_;
841
842 my ($first, $second) = @{ $el }[2, 3];
843 my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/;
844
845 return [ $first->[2], $old, undef ];
846 });
847
848 return;
849 }
850
851 sub _parse_section {
852 my ($self, $section, $parser) = @_;
853
854 my $items = $self->_look_down($section => sub {
855 my ($el) = @_;
856 return unless ref $el && $el->[0] =~ /^item-/
857 && @{ $el } > 2 && ref $el->[2];
5c404e2c 858 return unless $el->[2]->[0] =~ /C|L/;
93b00636
FR
859
860 return 1;
861 });
862
863 return [map { $parser->($_) } @{ $items }];
864 }
865
866 sub _look_down {
867 my ($self, $pod, $predicate) = @_;
868 my @pod = @{ $pod };
869
870 my @l;
871 while (my $el = shift @pod) {
872 push @l, $el if $predicate->($el);
873 if (ref $el) {
874 my @el = @{ $el };
875 splice @el, 0, 2;
876 unshift @pod, @el if @el;
877 }
878 }
879
880 return @l ? \@l : undef;
881 }
882
883 sub _look_for_section {
884 my ($self, $pod, $section) = @_;
885
886 my $level;
887 $self->_look_for_range($pod,
888 sub {
889 my ($el) = @_;
86c08a2c 890 my ($heading) = $el->[0] =~ /^head(\d)$/;
b88937b3 891 my $f = $heading && $el->[2] =~ /^$section/;
86c08a2c 892 $level = $heading if $f && !$level;
93b00636
FR
893 return $f;
894 },
895 sub {
896 my ($el) = @_;
897 $el->[0] =~ /^head(\d)$/ && $1 <= $level;
898 },
899 );
900 }
901
902 sub _look_for_range {
903 my ($self, $pod, $start_predicate, $stop_predicate) = @_;
904
905 my @l;
906 for my $el (@{ $pod }) {
907 if (@l) {
908 return \@l if $stop_predicate->($el);
909 }
910 else {
911 next unless $start_predicate->($el);
912 }
913 push @l, $el;
914 }
915
916 return;
917 }
2a19fd14 918}
88e14305 919
2a19fd14 920run;