Commit | Line | Data |
---|---|---|
603f4ea0 DG |
1 | #!perl |
2 | use 5.010; | |
3 | use strict; | |
4 | use warnings; | |
5 | use lib 'Porting'; | |
6 | use Maintainers qw/%Modules/; | |
5e0e6da6 | 7 | use lib 'dist/Module-CoreList/lib'; |
603f4ea0 | 8 | use Module::CoreList; |
2a19fd14 | 9 | use Getopt::Long; |
e8097ff2 MM |
10 | |
11 | =head1 USAGE | |
12 | ||
13 | # generate the module changes for the Perl you are currently building | |
67e92e89 | 14 | ./perl -Ilib Porting/corelist-perldelta.pl |
5c51ce30 AV |
15 | |
16 | # update the module changes for the Perl you are currently building | |
59d2de25 | 17 | ./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod |
5c51ce30 | 18 | |
e8097ff2 | 19 | # generate a diff between the corelist sections of two perldelta* files: |
80da4b39 | 20 | perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod |
e8097ff2 MM |
21 | |
22 | =head1 ABOUT | |
23 | ||
24 | corelist-perldelta.pl is a bit schizophrenic. The part to generate the | |
25 | new Perldelta text does not need Algorithm::Diff, but wants to be | |
26 | run with the freshly built Perl. | |
27 | ||
28 | The part to check the diff wants to be run with a Perl that has an up-to-date | |
29 | L<Module::CoreList>, but needs the outside L<Algorithm::Diff>. | |
30 | ||
31 | Ideally, the program will be split into two separate programs, one | |
b88937b3 | 32 | to generate the text and one to show the diff between the |
e8097ff2 MM |
33 | corelist sections of the last perldelta and the next perldelta. |
34 | ||
59d2de25 AV |
35 | Currently no information about Removed Modules is displayed in any of the |
36 | modes. | |
54e78c5e | 37 | |
e8097ff2 | 38 | =cut |
93b00636 FR |
39 | |
40 | my %sections = ( | |
8d132c8e MM |
41 | new => qr/New Modules and Pragma(ta)?/, |
42 | updated => qr/Updated Modules and Pragma(ta)?/, | |
43 | removed => qr/Removed Modules and Pragma(ta)?/, | |
44 | ); | |
45 | ||
46 | my %titles = ( | |
caaa1415 MM |
47 | new => 'New Modules and Pragmata', |
48 | updated => 'Updated Modules and Pragmata', | |
49 | removed => 'Removed Modules and Pragmata', | |
93b00636 | 50 | ); |
603f4ea0 | 51 | |
88e14305 DG |
52 | my $deprecated; |
53 | ||
2a19fd14 FR |
54 | sub run { |
55 | my %opt = (mode => 'generate'); | |
56 | ||
57 | GetOptions(\%opt, | |
5c51ce30 | 58 | 'mode|m:s', # 'generate', 'check', 'update' |
2a19fd14 FR |
59 | ); |
60 | ||
93b00636 | 61 | # by default, compare latest two version in CoreList; |
c27d84d9 | 62 | my ($old, $new) = latest_two_perl_versions(); |
5e1cf63b AV |
63 | |
64 | # use the provided versions if present | |
65 | # @ARGV >=2 means [old_version] [new_version] [path/to/file] | |
66 | if ( @ARGV >= 2) { | |
67 | ($old, $new) = (shift @ARGV, shift @ARGV); | |
68 | die "$old is an invalid version\n" if not exists | |
69 | $Module::CoreList::version{$old}; | |
29900347 | 70 | die "$new is an invalid version\n" if not exists |
5e1cf63b AV |
71 | $Module::CoreList::version{$new}; |
72 | } | |
2a19fd14 | 73 | |
93b00636 | 74 | if ( $opt{mode} eq 'generate' ) { |
2a19fd14 | 75 | do_generate($old => $new); |
88e14305 | 76 | } |
93b00636 FR |
77 | elsif ( $opt{mode} eq 'check' ) { |
78 | do_check(\*ARGV, $old => $new); | |
79 | } | |
5c51ce30 AV |
80 | elsif ( $opt{mode} eq 'update' ) { |
81 | do_update_existing(shift @ARGV, $old => $new); | |
82 | } | |
88e14305 | 83 | else { |
2a19fd14 FR |
84 | die "Unrecognized mode '$opt{mode}'\n"; |
85 | } | |
86 | ||
87 | exit 0; | |
88 | } | |
89 | ||
c27d84d9 AC |
90 | sub latest_two_perl_versions { |
91 | ||
92 | my @versions = sort keys %Module::CoreList::version; | |
93 | ||
94 | my $new = pop @versions; | |
95 | ||
96 | # If a fully-padded version number ends in a zero (as in "5.019010"), that | |
97 | # version shows up in %Module::CoreList::version both with and without its | |
98 | # trailing zeros. So skip all versions that are numerically equal to $new. | |
99 | pop @versions while @versions && $versions[-1] == $new; | |
100 | ||
101 | die "Too few distinct core versions in %Module::CoreList::version ?!\n" | |
102 | if !@versions; | |
103 | ||
104 | return $versions[-1], $new; | |
105 | } | |
106 | ||
f40724d5 AV |
107 | # Given two perl versions, it returns a list describing the core distributions that have changed. |
108 | # The first three elements are hashrefs corresponding to new, updated, and removed modules | |
109 | # and are of the form (mostly, see the special remarks about removed): | |
110 | # 'Distribution Name' => ['Distribution Name', previous version number, current version number] | |
3c7fa354 SH |
111 | # where the version number is undef if the distribution did not exist. |
112 | # The fourth element is an arrayref of core distribution names of those distribution for which it | |
113 | # is unknown whether they have changed and therefore need to be manually checked. | |
f40724d5 AV |
114 | # |
115 | # In most cases, the distribution name in %Modules corresponds to the module that is representative | |
116 | # of the distribution as listed in Module::CoreList. However, there are a few distribution names | |
3c7fa354 | 117 | # that do not correspond to a module. %distToModules has been created which maps the distribution |
f40724d5 AV |
118 | # name to a representative module. The representative module was chosen by either looking at the |
119 | # Makefile of the distribution or by seeing which module the distribution has been traditionally | |
3c7fa354 | 120 | # listed under in past perldeltas. |
f40724d5 AV |
121 | # |
122 | # There are a few distributions for which there is no single representative module (e.g. libnet). | |
123 | # These distributions are returned as the last element of the list. | |
124 | # | |
125 | # %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p. | |
126 | # This list contains modules and pragmata that may also be present in Module::CoreList. | |
127 | # A list of modules are in the list @unclaimedModules, which were manually listed based on whether | |
3c7fa354 | 128 | # they were independent modules and whether they have been listed in past perldeltas. |
f40724d5 AV |
129 | # The pragmata were found by doing something like: |
130 | # say for sort grep { $_ eq lc $_ and !exists $Modules{$_}} | |
131 | # keys %{$Module::CoreList::version{'5.019003'}} | |
132 | # and manually filtering out pragamata that were already covered. | |
133 | # | |
134 | # It is currently not possible to differentiate between a removed module and a removed | |
135 | # distribution. Therefore, the removed hashref contains every module that has been removed, even if | |
136 | # the module's corresponding distribution has not been removed. | |
137 | ||
93b00636 | 138 | sub corelist_delta { |
2a19fd14 FR |
139 | my ($old, $new) = @_; |
140 | my $corelist = \%Module::CoreList::version; | |
f40724d5 | 141 | my %changes = Module::CoreList::changes_between( $old, $new ); |
2a19fd14 FR |
142 | $deprecated = $Module::CoreList::deprecated{$new}; |
143 | ||
f40724d5 AV |
144 | my $getModifyType = sub { |
145 | my $data = shift; | |
146 | if ( exists $data->{left} and exists $data->{right} ) { | |
147 | return 'updated'; | |
148 | } | |
149 | elsif ( !exists $data->{left} and exists $data->{right} ) { | |
150 | return 'new'; | |
151 | } | |
152 | elsif ( exists $data->{left} and !exists $data->{right} ) { | |
153 | return 'removed'; | |
154 | } | |
155 | return undef; | |
156 | }; | |
157 | ||
236bdd01 N |
158 | my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB |
159 | DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl | |
160 | ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob | |
161 | File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash | |
162 | I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via | |
163 | Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash | |
164 | Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm | |
165 | Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap XS::APItest Win32CORE/; | |
92ebde38 | 166 | my @unclaimedPragmata = qw/arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/; |
f40724d5 AV |
167 | my @unclaimed = (@unclaimedModules, @unclaimedPragmata); |
168 | ||
169 | my %distToModules = ( | |
170 | 'IO-Compress' => [ | |
171 | { | |
d8c6b69e | 172 | 'name' => 'IO-Compress', |
f40724d5 | 173 | 'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ), |
d8c6b69e SH |
174 | 'data' => $changes{'IO::Compress::Base'} |
175 | } | |
176 | ], | |
177 | 'libnet' => [ | |
178 | { | |
179 | 'name' => 'libnet', | |
180 | 'modification' => $getModifyType->( $changes{'Net::Cmd'} ), | |
181 | 'data' => $changes{'Net::Cmd'} | |
f40724d5 AV |
182 | } |
183 | ], | |
f40724d5 AV |
184 | 'PathTools' => [ |
185 | { | |
186 | 'name' => 'File::Spec', | |
187 | 'modification' => $getModifyType->( $changes{'Cwd'} ), | |
188 | 'data' => $changes{'Cwd'} | |
189 | } | |
190 | ], | |
d8c6b69e SH |
191 | 'podlators' => [ |
192 | { | |
193 | 'name' => 'podlators', | |
194 | 'modification' => $getModifyType->( $changes{'Pod::Text'} ), | |
195 | 'data' => $changes{'Pod::Text'} | |
196 | } | |
197 | ], | |
f40724d5 AV |
198 | 'Scalar-List-Utils' => [ |
199 | { | |
200 | 'name' => 'List::Util', | |
201 | 'modification' => $getModifyType->( $changes{'List::Util'} ), | |
202 | 'data' => $changes{'List::Util'} | |
203 | }, | |
204 | { | |
205 | 'name' => 'Scalar::Util', | |
206 | 'modification' => $getModifyType->( $changes{'Scalar::Util'} ), | |
207 | 'data' => $changes{'Scalar::Util'} | |
d8c6b69e SH |
208 | }, |
209 | { | |
210 | 'name' => 'Sub::Util', | |
211 | 'modification' => $getModifyType->( $changes{'Sub::Util'} ), | |
212 | 'data' => $changes{'Sub::Util'} | |
f40724d5 AV |
213 | } |
214 | ], | |
215 | 'Text-Tabs+Wrap' => [ | |
216 | { | |
217 | 'name' => 'Text::Tabs', | |
218 | 'modification' => $getModifyType->( $changes{'Text::Tabs'} ), | |
219 | 'data' => $changes{'Text::Tabs'} | |
220 | }, | |
221 | { | |
222 | 'name' => 'Text::Wrap', | |
223 | 'modification' => $getModifyType->( $changes{'Text::Wrap'} ), | |
224 | 'data' => $changes{'Text::Wrap'} | |
225 | } | |
226 | ], | |
227 | ); | |
228 | ||
229 | # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ] | |
230 | my $deltaGrouping = {}; | |
231 | ||
232 | # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it | |
233 | my @manuallyCheck; | |
2a19fd14 FR |
234 | |
235 | # %Modules defines what is currently in core | |
236 | for my $k ( keys %Modules ) { | |
f40724d5 AV |
237 | next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed |
238 | next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed | |
239 | ||
240 | my ( $distName, $modifyType, $data ); | |
241 | ||
242 | if ( exists $changes{$k} ) { | |
243 | $distName = $k; | |
244 | $modifyType = $getModifyType->( $changes{$k} ); | |
245 | $data = $changes{$k}; | |
88e14305 | 246 | } |
f40724d5 AV |
247 | elsif ( exists $distToModules{$k} ) { |
248 | # modification will be undef if the distribution has not changed | |
249 | my @modules = grep { $_->{modification} } @{ $distToModules{$k} }; | |
250 | for (@modules) { | |
251 | $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ]; | |
2a19fd14 | 252 | } |
f40724d5 AV |
253 | next; |
254 | } | |
255 | else { | |
256 | push @manuallyCheck, $k and next; | |
257 | } | |
258 | ||
259 | $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ]; | |
260 | } | |
261 | ||
262 | for my $k (@unclaimed) { | |
263 | if ( exists $changes{$k} ) { | |
264 | $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} = | |
265 | [ $k, $changes{$k}->{left}, $changes{$k}->{right} ]; | |
88e14305 DG |
266 | } |
267 | } | |
603f4ea0 | 268 | |
2a19fd14 FR |
269 | # in old corelist, but not this one => removed |
270 | # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from | |
271 | # distributions will show up here, too. Some person will have to review to see what's | |
272 | # important. That's the best we can do without a historical Maintainers.pl | |
273 | for my $k ( keys %{ $corelist->{$old} } ) { | |
274 | if ( ! exists $corelist->{$new}{$k} ) { | |
f40724d5 | 275 | $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ]; |
2a19fd14 | 276 | } |
88e14305 | 277 | } |
88e14305 | 278 | |
f40724d5 AV |
279 | return ( |
280 | \%{ $deltaGrouping->{'new'} }, | |
281 | \%{ $deltaGrouping->{'removed'} }, | |
282 | \%{ $deltaGrouping->{'updated'} }, | |
283 | \@manuallyCheck | |
284 | ); | |
93b00636 FR |
285 | } |
286 | ||
5c51ce30 AV |
287 | # currently does not update the Removed Module section |
288 | sub do_update_existing { | |
289 | my ( $existing, $old, $new ) = @_; | |
290 | ||
291 | my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new ); | |
c687a5cd | 292 | if (@{$manuallyCheck}) { |
59d2de25 AV |
293 | print "It cannot be determined whether the following distributions have changed.\n"; |
294 | print "Please check and list accordingly:\n"; | |
5c51ce30 | 295 | say "\t* $_" for sort @{$manuallyCheck}; |
59d2de25 | 296 | print "\n"; |
5c51ce30 AV |
297 | } |
298 | ||
299 | my $data = { | |
300 | new => $added, | |
301 | updated => $updated, | |
302 | #removed => $removed, ignore removed for now | |
303 | }; | |
304 | ||
305 | my $text = DeltaUpdater::transform_pod( $existing, $data ); | |
306 | open my $out, '>', $existing or die "can't open perldelta file $existing: $!"; | |
9c7cb461 | 307 | binmode($out); |
5c51ce30 AV |
308 | print $out $text; |
309 | close $out; | |
59d2de25 AV |
310 | say "The New and Updated Modules and Pragamata sections in $existing have been updated"; |
311 | say "Please ensure the Removed Modules and Pragmata section is up-to-date"; | |
5c51ce30 AV |
312 | } |
313 | ||
93b00636 FR |
314 | sub do_generate { |
315 | my ($old, $new) = @_; | |
f40724d5 AV |
316 | my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new); |
317 | ||
318 | if ($manuallyCheck) { | |
59d2de25 AV |
319 | print "\nXXXIt cannot be determined whether the following distributions have changed.\n"; |
320 | print "Please check and list accordingly:\n"; | |
f40724d5 | 321 | say "\t$_" for @{$manuallyCheck}; |
59d2de25 | 322 | print "\n"; |
f40724d5 | 323 | } |
93b00636 | 324 | |
3bb80250 AV |
325 | my $data = { |
326 | new => $added, | |
327 | updated => $updated, | |
328 | #removed => $removed, ignore removed for now | |
329 | }; | |
330 | ||
331 | say DeltaUpdater::sections_to_pod($data) | |
93b00636 FR |
332 | } |
333 | ||
334 | sub do_check { | |
335 | my ($in, $old, $new) = @_; | |
336 | ||
337 | my $delta = DeltaParser->new($in); | |
f40724d5 | 338 | my ($added, $removed, $updated) = corelist_delta($old => $new); |
93b00636 | 339 | |
fc353283 AV |
340 | # because of the difficulty in identifying the distribution for removed modules |
341 | # don't bother checking them | |
f40724d5 | 342 | for my $ck ([ 'new', $delta->new_modules, $added ], |
fc353283 | 343 | #[ 'removed', $delta->removed_modules, $removed ], |
f40724d5 | 344 | [ 'updated', $delta->updated_modules, $updated ] ) { |
93b00636 | 345 | my @delta = @{ $ck->[1] }; |
f40724d5 | 346 | my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] }; |
93b00636 FR |
347 | |
348 | printf $ck->[0] . ":\n"; | |
349 | ||
e8097ff2 | 350 | require Algorithm::Diff; |
93b00636 FR |
351 | my $diff = Algorithm::Diff->new(map { |
352 | [map { join q{ } => grep defined, @{ $_ } } @{ $_ }] | |
353 | } \@delta, \@corelist); | |
354 | ||
355 | while ($diff->Next) { | |
356 | next if $diff->Same; | |
357 | my $sep = ''; | |
358 | if (!$diff->Items(2)) { | |
359 | printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 )); | |
360 | } elsif(!$diff->Items(1)) { | |
361 | printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 )); | |
362 | } else { | |
363 | $sep = "---\n"; | |
364 | printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 )); | |
365 | } | |
fc353283 | 366 | print "Delta< $_\n" for $diff->Items(1); |
93b00636 | 367 | print $sep; |
fc353283 | 368 | print "Corelist> $_\n" for $diff->Items(2); |
93b00636 FR |
369 | } |
370 | ||
371 | print "\n"; | |
372 | } | |
373 | } | |
374 | ||
375 | { | |
5c51ce30 AV |
376 | |
377 | package DeltaUpdater; | |
378 | use List::Util 'reduce'; | |
379 | ||
380 | sub get_section_name_from_heading { | |
381 | my $heading = shift; | |
382 | while (my ($key, $expression) = each %sections) { | |
383 | if ($heading =~ $expression) { | |
384 | return $titles{$key}; | |
385 | } | |
386 | } | |
387 | die "$heading did not match any section"; | |
388 | } | |
389 | ||
390 | sub is_desired_section_name { | |
391 | for (values %sections) { | |
392 | return 1 if $_[0] =~ $_; | |
393 | } | |
394 | return 0; | |
395 | } | |
396 | ||
397 | # verify the module and pragmata in the section, changing the stated version if necessary | |
398 | # this subroutine warns if the module name cannot be parsed or if it is not listed in | |
399 | # the results returned from corelist_delta() | |
400 | # | |
401 | # a side-effect of calling this function is that modules present in the section are | |
402 | # removed from $data, resulting in $data containing only those modules and pragmata | |
403 | # that were not listed in the perldelta file. This means we can then pass $data to | |
404 | # add_to_section() without worrying about filtering out duplicates | |
405 | sub update_section { | |
406 | my ( $section, $data, $title ) = @_; | |
407 | my @items = @{ $section->{items} }; | |
408 | ||
409 | for my $item (@items) { | |
410 | ||
411 | my $content = $item->{text}; | |
412 | my $module = $item->{name}; | |
413 | ||
318148ea AV |
414 | #skip dummy items |
415 | next if !$module and $content =~ /\s*xx*\s*/i; | |
416 | ||
5c51ce30 | 417 | say "Could not parse module name; line is:\n\t$content" and next unless $module; |
59d2de25 AV |
418 | |
419 | if ( !$data->{$title}{$module} ) { | |
420 | print "$module is not listed as being $title in Module::CoreList.\n"; | |
421 | print "Ensure Module::CoreList has been updated and\n"; | |
422 | print "check to see that the distribution is not listed under another name.\n\n"; | |
423 | next; | |
424 | } | |
5c51ce30 AV |
425 | |
426 | if ( $title eq 'new' ) { | |
427 | my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m; | |
428 | say "Could not parse new version for $module; line is:\n\t$content" and next unless $new; | |
429 | if ( $data->{$title}{$module}[2] ne $new ) { | |
430 | say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2]; | |
431 | } | |
432 | $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me; | |
433 | } | |
434 | ||
435 | elsif ( $title eq 'updated' ) { | |
436 | my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s; | |
437 | say "Could not parse old and new version for $module; line is:\n\t$content" and next | |
438 | unless $prev and $new; | |
439 | if ( $data->{$title}{$module}[1] ne $prev ) { | |
440 | say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1]; | |
441 | } | |
442 | if ( $data->{$title}{$module}[2] ne $new ) { | |
443 | say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2]; | |
444 | } | |
445 | $content =~ | |
446 | s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se; | |
447 | } | |
448 | ||
449 | elsif ( $title eq 'removed' ) { | |
450 | my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m; | |
451 | say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev; | |
452 | if ( $data->{$title}{$module}[1] ne $prev ) { | |
453 | say "$module: previous version differs; $prev " . $data->{$title}{$module}[1]; | |
454 | } | |
455 | $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me; | |
456 | } | |
457 | ||
458 | delete $data->{$title}{$module}; | |
459 | $item->{text} = $content; | |
460 | } | |
461 | return $section; | |
462 | } | |
463 | ||
464 | # add modules and pragmata present in $data to the section | |
465 | sub add_to_section { | |
466 | my ( $section, $data, $title ) = @_; | |
467 | ||
8b591357 | 468 | #undef is a valid version name in Module::CoreList so suppress warnings about concatenating undef values |
5c51ce30 AV |
469 | no warnings 'uninitialized'; |
470 | for ( values %{ $data->{$title} } ) { | |
471 | my ( $mod, $old_v, $new_v ) = @{$_}; | |
472 | my ( $item, $text ); | |
473 | ||
474 | $item = { name => $mod, text => "=item *\n" }; | |
475 | if ( $title eq 'new' ) { | |
476 | $text = "L<$mod> $new_v has been added to the Perl core.\n"; | |
477 | } | |
478 | ||
479 | elsif ( $title eq 'updated' ) { | |
480 | $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n"; | |
481 | if ( $deprecated->{$mod} ) { | |
482 | $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n"; | |
483 | } | |
484 | } | |
485 | ||
486 | elsif ( $title eq 'removed' ) { | |
487 | $text = "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n"; | |
488 | } | |
489 | ||
490 | $item->{text} .= "\n$text\n"; | |
491 | push @{ $section->{items} }, $item; | |
492 | } | |
493 | return $section; | |
494 | } | |
495 | ||
496 | sub sort_items_in_section { | |
497 | my ($section) = @_; | |
498 | ||
499 | # if we could not parse the module name, it will be uninitalized | |
500 | # in sort. This is not a problem as it will just result in these | |
8b591357 | 501 | # sections being placed near the beginning of the section |
5c51ce30 AV |
502 | no warnings 'uninitialized'; |
503 | $section->{items} = | |
504 | [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ]; | |
505 | return $section; | |
506 | } | |
507 | ||
508 | # given a hashref of the form returned by corelist_delta() | |
509 | # and a hash structured as documented in transform_pod(), it returns | |
510 | # a pod string representation of the sections, creating sections | |
511 | # if necessary | |
512 | sub sections_to_pod { | |
513 | my ( $data, %sections ) = @_; | |
514 | my $out = ''; | |
515 | ||
516 | for ( | |
517 | ( | |
518 | [ 'New Modules and Pragmata', 'new' ], | |
519 | [ 'Updated Modules and Pragmata', 'updated' ], | |
520 | [ 'Removed Modules and Pragmata', 'removed' ] | |
521 | ) | |
522 | ) | |
523 | { | |
524 | my ( $section_name, $title ) = @{$_}; | |
525 | ||
526 | my $section = $sections{$section_name} // { | |
3c7fa354 SH |
527 | name => $section_name, |
528 | preceding_text => "=head2 $_->[0]\n=over 4\n", | |
529 | following_text => "=back\n", | |
530 | items => [], | |
531 | manual => 1 | |
5c51ce30 AV |
532 | }; |
533 | ||
534 | $section = update_section( $section, $data, $title ); | |
535 | $section = add_to_section( $section, $data, $title ); | |
536 | $section = sort_items_in_section( $section ); | |
537 | ||
538 | next if $section->{manual} and scalar @{ $section->{items} } == 0; | |
539 | ||
540 | my $items = reduce { no warnings 'once'; $a . $b->{text} } | |
541 | ( '', @{ $section->{items} } ); | |
542 | $out .= | |
3c7fa354 | 543 | ( $section->{preceding_text} // '' ) |
5c51ce30 AV |
544 | . $items |
545 | . ( $section->{following_text} // '' ); | |
546 | } | |
547 | return $out; | |
548 | } | |
549 | ||
550 | # given a filename corresponding to an existing perldelta file | |
551 | # and a hashref of the form returned by corelist_delta(), it | |
552 | # returns a string of the resulting file after the module | |
553 | # information has been added. | |
554 | sub transform_pod { | |
555 | my ( $existing, $data ) = @_; | |
556 | ||
557 | # will contain hashrefs corresponding to new, updated and removed | |
558 | # modules and pragmata keyed by section name | |
559 | # each section is hashref of the structure | |
3c7fa354 SH |
560 | # preceding_text => Text occurring before and including the over |
561 | # region containing the list of modules, | |
562 | # items => [Arrayref of hashrefs corresponding to a module | |
563 | # entry], | |
5c51ce30 AV |
564 | # an entry has the form: |
565 | # name => Module name or undef if the name could not be determined | |
566 | # text => The text of the entry, including the item heading | |
567 | # | |
3c7fa354 SH |
568 | # following_text => Any text not corresponding to a module |
569 | # that occurs after the first module | |
5c51ce30 AV |
570 | # |
571 | # the sections are converted to a pod string by calling sections_to_pod() | |
572 | my %sections; | |
573 | ||
574 | # we are in the Modules_and_Pragmata's section | |
575 | my $in_Modules_and_Pragmata; | |
3c7fa354 | 576 | |
5c51ce30 AV |
577 | # we are the Modules_and_Pragmata's section but have not |
578 | # encountered any of the desired sections. We use this | |
579 | # flag to determine whether we should append the text to $out | |
580 | # or we need to delay appending until the module listings are | |
581 | # processed and instead append to $append_to_out | |
582 | my $in_Modules_and_Pragmata_preamble; | |
3c7fa354 | 583 | |
5c51ce30 AV |
584 | my $done_processing_Modules_and_Pragmata; |
585 | ||
586 | my $current_section; | |
3c7fa354 | 587 | |
5c51ce30 AV |
588 | # $nested_element_level == 0 : not in an over region, treat lines as text |
589 | # $nested_element_level == 1 : presumably in the top over region that | |
590 | # corresponds to the module listing. Treat | |
591 | # each item as a module | |
592 | # $nested_element_level > 1 : we only consider these values when we are in an item | |
593 | # We treat lines as the text of the current item. | |
594 | my $nested_element_level = 0; | |
3c7fa354 | 595 | |
5c51ce30 AV |
596 | my $current_item; |
597 | my $need_to_parse_module_name; | |
598 | ||
599 | my $out = ''; | |
600 | my $append_to_out = ''; | |
601 | ||
602 | open my $fh, '<', $existing or die "can't open perldelta file $existing: $!"; | |
9c7cb461 | 603 | binmode($fh); |
5c51ce30 AV |
604 | |
605 | while (<$fh>) { | |
606 | # treat the rest of the file as plain text | |
607 | if ($done_processing_Modules_and_Pragmata) { | |
608 | $out .= $_; | |
609 | next; | |
610 | } | |
611 | ||
612 | elsif ( !$in_Modules_and_Pragmata ) { | |
613 | # entering Modules and Pragmata | |
614 | if (/^=head1 Modules and Pragmata/) { | |
615 | $in_Modules_and_Pragmata = 1; | |
616 | $in_Modules_and_Pragmata_preamble = 1; | |
617 | } | |
618 | $out .= $_; | |
619 | next; | |
620 | } | |
621 | ||
622 | # leaving Modules and Pragmata | |
623 | elsif (/^=head1/) { | |
624 | if ($current_section) { | |
625 | push @{ $current_section->{items} }, $current_item | |
626 | if $current_item; | |
627 | $sections{ $current_section->{name} } = $current_section; | |
628 | } | |
629 | $done_processing_Modules_and_Pragmata = 1; | |
630 | $out .= | |
631 | sections_to_pod( $data, %sections ) . $append_to_out . $_; | |
632 | next; | |
633 | } | |
634 | ||
635 | # new section in Modules and Pragmata | |
636 | elsif (/^=head2 (.*?)$/) { | |
637 | my $name = $1; | |
638 | if ($current_section) { | |
639 | push @{ $current_section->{items} }, $current_item | |
640 | if $current_item; | |
641 | $sections{ $current_section->{name} } = $current_section; | |
642 | undef $current_section; | |
643 | } | |
644 | ||
645 | if ( is_desired_section_name($name) ) { | |
646 | undef $in_Modules_and_Pragmata_preamble; | |
647 | if ( $nested_element_level > 0 ) { | |
648 | die "Unexpected head2 at line no. $."; | |
649 | } | |
650 | my $title = get_section_name_from_heading($name); | |
651 | if ( exists $sections{$title} ) { | |
3c7fa354 | 652 | die "$name occurred twice at line no. $."; |
5c51ce30 | 653 | } |
3c7fa354 SH |
654 | $current_section = {}; |
655 | $current_section->{name} = $title; | |
656 | $current_section->{preceding_text} = $_; | |
657 | $current_section->{items} = []; | |
658 | $nested_element_level = 0; | |
5c51ce30 AV |
659 | next; |
660 | } | |
661 | ||
662 | # otherwise treat section as plain text | |
663 | else { | |
664 | if ($in_Modules_and_Pragmata_preamble) { | |
665 | $out .= $_; | |
666 | } | |
667 | else { | |
668 | $append_to_out .= $_; | |
669 | } | |
670 | next; | |
671 | } | |
672 | } | |
673 | ||
674 | elsif ($current_section) { | |
675 | ||
676 | # not in an over region | |
677 | if ( $nested_element_level == 0 ) { | |
678 | if (/^=over/) { | |
679 | $nested_element_level++; | |
680 | } | |
681 | if ( scalar @{ $current_section->{items} } > 0 ) { | |
682 | $current_section->{following_text} .= $_; | |
683 | } | |
684 | else { | |
3c7fa354 | 685 | $current_section->{preceding_text} .= $_; |
5c51ce30 AV |
686 | } |
687 | next; | |
688 | } | |
689 | ||
690 | if ($current_item) { | |
691 | if ($need_to_parse_module_name) { | |
692 | # the item may not have a parsable module name, which means that | |
693 | # $current_item->{name} will never be defined. | |
694 | if (/^(?:L|C)<(.+?)>/) { | |
695 | $current_item->{name} = $1; | |
696 | undef $need_to_parse_module_name; | |
697 | } | |
698 | # =item or =back signals the end of an item | |
699 | # block, which we handle below | |
700 | if ( !/^=(?:item|back)/ ) { | |
701 | $current_item->{text} .= $_; | |
702 | next; | |
703 | } | |
704 | } | |
705 | # currently in an over region | |
706 | # treat text inside region as plain text | |
707 | if ( $nested_element_level > 1 ) { | |
708 | if (/^=back/) { | |
709 | $nested_element_level--; | |
710 | } | |
711 | elsif (/^=over/) { | |
712 | $nested_element_level++; | |
713 | } | |
714 | $current_item->{text} .= $_; | |
715 | next; | |
716 | } | |
717 | # entering over region | |
718 | if (/^=over/) { | |
719 | $nested_element_level++; | |
720 | $current_item->{text} .= $_; | |
721 | next; | |
722 | } | |
723 | # =item or =back signals the end of an item | |
724 | # block, which we handle below | |
725 | if ( !/^=(?:item|back)/ ) { | |
726 | $current_item->{text} .= $_; | |
727 | next; | |
728 | } | |
729 | } | |
730 | ||
731 | if (/^=item \*/) { | |
732 | push @{ $current_section->{items} }, $current_item | |
733 | if $current_item; | |
734 | $current_item = { text => $_ }; | |
735 | $need_to_parse_module_name = 1; | |
736 | next; | |
737 | } | |
738 | ||
739 | if (/^=back/) { | |
740 | push @{ $current_section->{items} }, $current_item | |
741 | if $current_item; | |
742 | undef $current_item; | |
743 | $nested_element_level--; | |
744 | } | |
745 | ||
746 | if ( scalar @{ $current_section->{items} } == 0 ) { | |
3c7fa354 | 747 | $current_section->{preceding_text} .= $_; |
5c51ce30 AV |
748 | } |
749 | else { | |
750 | $current_section->{following_text} .= $_; | |
751 | } | |
752 | next; | |
753 | } | |
754 | ||
755 | # text in Modules and Pragmata not in a head2 region | |
756 | else { | |
757 | if ($in_Modules_and_Pragmata_preamble) { | |
758 | $out .= $_; | |
759 | } | |
760 | else { | |
761 | $append_to_out .= $_; | |
762 | } | |
763 | next; | |
764 | } | |
765 | } | |
766 | close $fh; | |
767 | die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata; | |
768 | return $out; | |
769 | } | |
770 | ||
771 | } | |
772 | ||
773 | { | |
93b00636 FR |
774 | package DeltaParser; |
775 | use Pod::Simple::SimpleTree; | |
776 | ||
777 | sub new { | |
778 | my ($class, $input) = @_; | |
779 | ||
780 | my $self = bless {} => $class; | |
781 | ||
782 | my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root; | |
783 | splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure, | |
8d132c8e | 784 | # just the nodes within it |
93b00636 FR |
785 | |
786 | $self->_parse_delta($parsed_pod); | |
787 | ||
788 | return $self; | |
789 | } | |
790 | ||
c014dfad AV |
791 | # creates the accessor methods: |
792 | # new_modules | |
793 | # updated_modules | |
794 | # removed_modules | |
93b00636 FR |
795 | for my $k (keys %sections) { |
796 | no strict 'refs'; | |
797 | my $m = "${k}_modules"; | |
798 | *$m = sub { $_[0]->{$m} }; | |
799 | } | |
800 | ||
801 | sub _parse_delta { | |
802 | my ($self, $pod) = @_; | |
803 | ||
c014dfad AV |
804 | my $new_section = $self->_look_for_section( $pod, $sections{new} ); |
805 | my $updated_section = $self->_look_for_section( $pod, $sections{updated} ); | |
806 | my $removed_section = $self->_look_for_section( $pod, $sections{removed} ); | |
807 | ||
808 | $self->_parse_new_section($new_section); | |
809 | $self->_parse_updated_section($updated_section); | |
810 | $self->_parse_removed_section($removed_section); | |
811 | ||
812 | for (qw/new_modules updated_modules removed_modules/) { | |
813 | $self->{$_} = | |
814 | [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ]; | |
93b00636 FR |
815 | } |
816 | ||
817 | return; | |
818 | } | |
819 | ||
820 | sub _parse_new_section { | |
821 | my ($self, $section) = @_; | |
822 | ||
c014dfad AV |
823 | $self->{new_modules} = []; |
824 | return unless $section; | |
93b00636 FR |
825 | $self->{new_modules} = $self->_parse_section($section => sub { |
826 | my ($el) = @_; | |
827 | ||
828 | my ($first, $second) = @{ $el }[2, 3]; | |
829 | my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/; | |
830 | ||
831 | return [ $first->[2], undef, $ver ]; | |
832 | }); | |
833 | ||
834 | return; | |
835 | } | |
836 | ||
837 | sub _parse_updated_section { | |
838 | my ($self, $section) = @_; | |
839 | ||
3668a9e5 AV |
840 | $self->{updated_modules} = []; |
841 | return unless $section; | |
93b00636 FR |
842 | $self->{updated_modules} = $self->_parse_section($section => sub { |
843 | my ($el) = @_; | |
844 | ||
845 | my ($first, $second) = @{ $el }[2, 3]; | |
846 | my $module = $first->[2]; | |
3668a9e5 AV |
847 | |
848 | # the regular expression matches the following: | |
849 | # from VERSION_NUMBER to VERSION_NUMBER | |
850 | # from VERSION_NUMBER to VERSION_NUMBER. | |
851 | # from version VERSION_NUMBER to version VERSION_NUMBER. | |
852 | # from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER | |
853 | # from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER | |
854 | # | |
3c7fa354 | 855 | # some perldeltas contain more than one module listed in an entry, this only attempts to match the |
3668a9e5 | 856 | # first module |
93b00636 | 857 | my ($old, $new) = $second =~ |
3668a9e5 | 858 | /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s; |
93b00636 FR |
859 | |
860 | warn "Unable to extract old or new version of $module from perldelta" | |
861 | if !defined $old || !defined $new; | |
862 | ||
863 | return [ $module, $old, $new ]; | |
864 | }); | |
865 | ||
866 | return; | |
867 | } | |
868 | ||
869 | sub _parse_removed_section { | |
870 | my ($self, $section) = @_; | |
c014dfad AV |
871 | |
872 | $self->{removed_modules} = []; | |
873 | return unless $section; | |
93b00636 FR |
874 | $self->{removed_modules} = $self->_parse_section($section => sub { |
875 | my ($el) = @_; | |
876 | ||
877 | my ($first, $second) = @{ $el }[2, 3]; | |
878 | my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/; | |
879 | ||
880 | return [ $first->[2], $old, undef ]; | |
881 | }); | |
882 | ||
883 | return; | |
884 | } | |
885 | ||
886 | sub _parse_section { | |
887 | my ($self, $section, $parser) = @_; | |
888 | ||
889 | my $items = $self->_look_down($section => sub { | |
890 | my ($el) = @_; | |
891 | return unless ref $el && $el->[0] =~ /^item-/ | |
892 | && @{ $el } > 2 && ref $el->[2]; | |
5c404e2c | 893 | return unless $el->[2]->[0] =~ /C|L/; |
93b00636 FR |
894 | |
895 | return 1; | |
896 | }); | |
897 | ||
898 | return [map { $parser->($_) } @{ $items }]; | |
899 | } | |
900 | ||
901 | sub _look_down { | |
902 | my ($self, $pod, $predicate) = @_; | |
903 | my @pod = @{ $pod }; | |
904 | ||
905 | my @l; | |
906 | while (my $el = shift @pod) { | |
907 | push @l, $el if $predicate->($el); | |
908 | if (ref $el) { | |
909 | my @el = @{ $el }; | |
910 | splice @el, 0, 2; | |
911 | unshift @pod, @el if @el; | |
912 | } | |
913 | } | |
914 | ||
915 | return @l ? \@l : undef; | |
916 | } | |
917 | ||
918 | sub _look_for_section { | |
919 | my ($self, $pod, $section) = @_; | |
920 | ||
921 | my $level; | |
922 | $self->_look_for_range($pod, | |
923 | sub { | |
924 | my ($el) = @_; | |
86c08a2c | 925 | my ($heading) = $el->[0] =~ /^head(\d)$/; |
b88937b3 | 926 | my $f = $heading && $el->[2] =~ /^$section/; |
86c08a2c | 927 | $level = $heading if $f && !$level; |
93b00636 FR |
928 | return $f; |
929 | }, | |
930 | sub { | |
931 | my ($el) = @_; | |
932 | $el->[0] =~ /^head(\d)$/ && $1 <= $level; | |
933 | }, | |
934 | ); | |
935 | } | |
936 | ||
937 | sub _look_for_range { | |
938 | my ($self, $pod, $start_predicate, $stop_predicate) = @_; | |
939 | ||
940 | my @l; | |
941 | for my $el (@{ $pod }) { | |
942 | if (@l) { | |
943 | return \@l if $stop_predicate->($el); | |
944 | } | |
945 | else { | |
946 | next unless $start_predicate->($el); | |
947 | } | |
948 | push @l, $el; | |
949 | } | |
950 | ||
951 | return; | |
952 | } | |
2a19fd14 | 953 | } |
88e14305 | 954 | |
2a19fd14 | 955 | run; |