Commit | Line | Data |
---|---|---|
603f4ea0 DG |
1 | #!perl |
2 | use 5.010; | |
3 | use strict; | |
4 | use warnings; | |
5 | use lib 'Porting'; | |
6 | use Maintainers qw/%Modules/; | |
5e0e6da6 | 7 | use lib 'dist/Module-CoreList/lib'; |
603f4ea0 | 8 | use Module::CoreList; |
2a19fd14 | 9 | use Getopt::Long; |
e8097ff2 MM |
10 | |
11 | =head1 USAGE | |
12 | ||
13 | # generate the module changes for the Perl you are currently building | |
67e92e89 | 14 | ./perl -Ilib Porting/corelist-perldelta.pl |
5c51ce30 AV |
15 | |
16 | # update the module changes for the Perl you are currently building | |
59d2de25 | 17 | ./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod |
5c51ce30 | 18 | |
e8097ff2 | 19 | # generate a diff between the corelist sections of two perldelta* files: |
80da4b39 | 20 | perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod |
e8097ff2 MM |
21 | |
22 | =head1 ABOUT | |
23 | ||
24 | corelist-perldelta.pl is a bit schizophrenic. The part to generate the | |
25 | new Perldelta text does not need Algorithm::Diff, but wants to be | |
26 | run with the freshly built Perl. | |
27 | ||
28 | The part to check the diff wants to be run with a Perl that has an up-to-date | |
29 | L<Module::CoreList>, but needs the outside L<Algorithm::Diff>. | |
30 | ||
31 | Ideally, the program will be split into two separate programs, one | |
b88937b3 | 32 | to generate the text and one to show the diff between the |
e8097ff2 MM |
33 | corelist sections of the last perldelta and the next perldelta. |
34 | ||
59d2de25 AV |
35 | Currently no information about Removed Modules is displayed in any of the |
36 | modes. | |
54e78c5e | 37 | |
e8097ff2 | 38 | =cut |
93b00636 FR |
39 | |
40 | my %sections = ( | |
8d132c8e MM |
41 | new => qr/New Modules and Pragma(ta)?/, |
42 | updated => qr/Updated Modules and Pragma(ta)?/, | |
43 | removed => qr/Removed Modules and Pragma(ta)?/, | |
44 | ); | |
45 | ||
46 | my %titles = ( | |
caaa1415 MM |
47 | new => 'New Modules and Pragmata', |
48 | updated => 'Updated Modules and Pragmata', | |
49 | removed => 'Removed Modules and Pragmata', | |
93b00636 | 50 | ); |
603f4ea0 | 51 | |
88e14305 DG |
52 | my $deprecated; |
53 | ||
2a19fd14 FR |
54 | sub run { |
55 | my %opt = (mode => 'generate'); | |
56 | ||
57 | GetOptions(\%opt, | |
5c51ce30 | 58 | 'mode|m:s', # 'generate', 'check', 'update' |
2a19fd14 FR |
59 | ); |
60 | ||
93b00636 | 61 | # by default, compare latest two version in CoreList; |
c27d84d9 | 62 | my ($old, $new) = latest_two_perl_versions(); |
5e1cf63b AV |
63 | |
64 | # use the provided versions if present | |
65 | # @ARGV >=2 means [old_version] [new_version] [path/to/file] | |
66 | if ( @ARGV >= 2) { | |
67 | ($old, $new) = (shift @ARGV, shift @ARGV); | |
68 | die "$old is an invalid version\n" if not exists | |
69 | $Module::CoreList::version{$old}; | |
29900347 | 70 | die "$new is an invalid version\n" if not exists |
5e1cf63b AV |
71 | $Module::CoreList::version{$new}; |
72 | } | |
2a19fd14 | 73 | |
93b00636 | 74 | if ( $opt{mode} eq 'generate' ) { |
2a19fd14 | 75 | do_generate($old => $new); |
88e14305 | 76 | } |
93b00636 FR |
77 | elsif ( $opt{mode} eq 'check' ) { |
78 | do_check(\*ARGV, $old => $new); | |
79 | } | |
5c51ce30 AV |
80 | elsif ( $opt{mode} eq 'update' ) { |
81 | do_update_existing(shift @ARGV, $old => $new); | |
82 | } | |
88e14305 | 83 | else { |
2a19fd14 FR |
84 | die "Unrecognized mode '$opt{mode}'\n"; |
85 | } | |
86 | ||
87 | exit 0; | |
88 | } | |
89 | ||
c27d84d9 AC |
90 | sub latest_two_perl_versions { |
91 | ||
92 | my @versions = sort keys %Module::CoreList::version; | |
93 | ||
94 | my $new = pop @versions; | |
95 | ||
96 | # If a fully-padded version number ends in a zero (as in "5.019010"), that | |
97 | # version shows up in %Module::CoreList::version both with and without its | |
98 | # trailing zeros. So skip all versions that are numerically equal to $new. | |
99 | pop @versions while @versions && $versions[-1] == $new; | |
100 | ||
101 | die "Too few distinct core versions in %Module::CoreList::version ?!\n" | |
102 | if !@versions; | |
103 | ||
104 | return $versions[-1], $new; | |
105 | } | |
106 | ||
f40724d5 AV |
107 | # Given two perl versions, it returns a list describing the core distributions that have changed. |
108 | # The first three elements are hashrefs corresponding to new, updated, and removed modules | |
109 | # and are of the form (mostly, see the special remarks about removed): | |
110 | # 'Distribution Name' => ['Distribution Name', previous version number, current version number] | |
3c7fa354 SH |
111 | # where the version number is undef if the distribution did not exist. |
112 | # The fourth element is an arrayref of core distribution names of those distribution for which it | |
113 | # is unknown whether they have changed and therefore need to be manually checked. | |
f40724d5 AV |
114 | # |
115 | # In most cases, the distribution name in %Modules corresponds to the module that is representative | |
116 | # of the distribution as listed in Module::CoreList. However, there are a few distribution names | |
3c7fa354 | 117 | # that do not correspond to a module. %distToModules has been created which maps the distribution |
f40724d5 AV |
118 | # name to a representative module. The representative module was chosen by either looking at the |
119 | # Makefile of the distribution or by seeing which module the distribution has been traditionally | |
3c7fa354 | 120 | # listed under in past perldeltas. |
f40724d5 AV |
121 | # |
122 | # There are a few distributions for which there is no single representative module (e.g. libnet). | |
123 | # These distributions are returned as the last element of the list. | |
124 | # | |
125 | # %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p. | |
126 | # This list contains modules and pragmata that may also be present in Module::CoreList. | |
127 | # A list of modules are in the list @unclaimedModules, which were manually listed based on whether | |
3c7fa354 | 128 | # they were independent modules and whether they have been listed in past perldeltas. |
f40724d5 AV |
129 | # The pragmata were found by doing something like: |
130 | # say for sort grep { $_ eq lc $_ and !exists $Modules{$_}} | |
131 | # keys %{$Module::CoreList::version{'5.019003'}} | |
132 | # and manually filtering out pragamata that were already covered. | |
133 | # | |
134 | # It is currently not possible to differentiate between a removed module and a removed | |
135 | # distribution. Therefore, the removed hashref contains every module that has been removed, even if | |
136 | # the module's corresponding distribution has not been removed. | |
137 | ||
93b00636 | 138 | sub corelist_delta { |
2a19fd14 FR |
139 | my ($old, $new) = @_; |
140 | my $corelist = \%Module::CoreList::version; | |
f40724d5 | 141 | my %changes = Module::CoreList::changes_between( $old, $new ); |
2a19fd14 FR |
142 | $deprecated = $Module::CoreList::deprecated{$new}; |
143 | ||
f40724d5 AV |
144 | my $getModifyType = sub { |
145 | my $data = shift; | |
146 | if ( exists $data->{left} and exists $data->{right} ) { | |
147 | return 'updated'; | |
148 | } | |
149 | elsif ( !exists $data->{left} and exists $data->{right} ) { | |
150 | return 'new'; | |
151 | } | |
152 | elsif ( exists $data->{left} and !exists $data->{right} ) { | |
153 | return 'removed'; | |
154 | } | |
155 | return undef; | |
156 | }; | |
157 | ||
158 | my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap Win32CORE/; | |
92ebde38 | 159 | my @unclaimedPragmata = qw/arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/; |
f40724d5 AV |
160 | my @unclaimed = (@unclaimedModules, @unclaimedPragmata); |
161 | ||
162 | my %distToModules = ( | |
163 | 'IO-Compress' => [ | |
164 | { | |
165 | 'name' => 'IO-Compress', | |
166 | 'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ), | |
167 | 'data' => $changes{'IO::Compress::Base'} | |
168 | } | |
169 | ], | |
170 | 'Locale-Codes' => [ | |
171 | { | |
172 | 'name' => 'Locale::Codes', | |
173 | 'modification' => $getModifyType->( $changes{'Locale::Codes'} ), | |
174 | 'data' => $changes{'Locale::Codes'} | |
175 | } | |
176 | ], | |
177 | 'PathTools' => [ | |
178 | { | |
179 | 'name' => 'File::Spec', | |
180 | 'modification' => $getModifyType->( $changes{'Cwd'} ), | |
181 | 'data' => $changes{'Cwd'} | |
182 | } | |
183 | ], | |
184 | 'Scalar-List-Utils' => [ | |
185 | { | |
186 | 'name' => 'List::Util', | |
187 | 'modification' => $getModifyType->( $changes{'List::Util'} ), | |
188 | 'data' => $changes{'List::Util'} | |
189 | }, | |
190 | { | |
191 | 'name' => 'Scalar::Util', | |
192 | 'modification' => $getModifyType->( $changes{'Scalar::Util'} ), | |
193 | 'data' => $changes{'Scalar::Util'} | |
194 | } | |
195 | ], | |
196 | 'Text-Tabs+Wrap' => [ | |
197 | { | |
198 | 'name' => 'Text::Tabs', | |
199 | 'modification' => $getModifyType->( $changes{'Text::Tabs'} ), | |
200 | 'data' => $changes{'Text::Tabs'} | |
201 | }, | |
202 | { | |
203 | 'name' => 'Text::Wrap', | |
204 | 'modification' => $getModifyType->( $changes{'Text::Wrap'} ), | |
205 | 'data' => $changes{'Text::Wrap'} | |
206 | } | |
207 | ], | |
208 | ); | |
209 | ||
210 | # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ] | |
211 | my $deltaGrouping = {}; | |
212 | ||
213 | # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it | |
214 | my @manuallyCheck; | |
2a19fd14 FR |
215 | |
216 | # %Modules defines what is currently in core | |
217 | for my $k ( keys %Modules ) { | |
f40724d5 AV |
218 | next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed |
219 | next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed | |
220 | ||
221 | my ( $distName, $modifyType, $data ); | |
222 | ||
223 | if ( exists $changes{$k} ) { | |
224 | $distName = $k; | |
225 | $modifyType = $getModifyType->( $changes{$k} ); | |
226 | $data = $changes{$k}; | |
88e14305 | 227 | } |
f40724d5 AV |
228 | elsif ( exists $distToModules{$k} ) { |
229 | # modification will be undef if the distribution has not changed | |
230 | my @modules = grep { $_->{modification} } @{ $distToModules{$k} }; | |
231 | for (@modules) { | |
232 | $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ]; | |
2a19fd14 | 233 | } |
f40724d5 AV |
234 | next; |
235 | } | |
236 | else { | |
237 | push @manuallyCheck, $k and next; | |
238 | } | |
239 | ||
240 | $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ]; | |
241 | } | |
242 | ||
243 | for my $k (@unclaimed) { | |
244 | if ( exists $changes{$k} ) { | |
245 | $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} = | |
246 | [ $k, $changes{$k}->{left}, $changes{$k}->{right} ]; | |
88e14305 DG |
247 | } |
248 | } | |
603f4ea0 | 249 | |
2a19fd14 FR |
250 | # in old corelist, but not this one => removed |
251 | # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from | |
252 | # distributions will show up here, too. Some person will have to review to see what's | |
253 | # important. That's the best we can do without a historical Maintainers.pl | |
254 | for my $k ( keys %{ $corelist->{$old} } ) { | |
255 | if ( ! exists $corelist->{$new}{$k} ) { | |
f40724d5 | 256 | $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ]; |
2a19fd14 | 257 | } |
88e14305 | 258 | } |
88e14305 | 259 | |
f40724d5 AV |
260 | return ( |
261 | \%{ $deltaGrouping->{'new'} }, | |
262 | \%{ $deltaGrouping->{'removed'} }, | |
263 | \%{ $deltaGrouping->{'updated'} }, | |
264 | \@manuallyCheck | |
265 | ); | |
93b00636 FR |
266 | } |
267 | ||
5c51ce30 AV |
268 | # currently does not update the Removed Module section |
269 | sub do_update_existing { | |
270 | my ( $existing, $old, $new ) = @_; | |
271 | ||
272 | my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new ); | |
273 | if ($manuallyCheck) { | |
59d2de25 AV |
274 | print "It cannot be determined whether the following distributions have changed.\n"; |
275 | print "Please check and list accordingly:\n"; | |
5c51ce30 | 276 | say "\t* $_" for sort @{$manuallyCheck}; |
59d2de25 | 277 | print "\n"; |
5c51ce30 AV |
278 | } |
279 | ||
280 | my $data = { | |
281 | new => $added, | |
282 | updated => $updated, | |
283 | #removed => $removed, ignore removed for now | |
284 | }; | |
285 | ||
286 | my $text = DeltaUpdater::transform_pod( $existing, $data ); | |
287 | open my $out, '>', $existing or die "can't open perldelta file $existing: $!"; | |
9c7cb461 | 288 | binmode($out); |
5c51ce30 AV |
289 | print $out $text; |
290 | close $out; | |
59d2de25 AV |
291 | say "The New and Updated Modules and Pragamata sections in $existing have been updated"; |
292 | say "Please ensure the Removed Modules and Pragmata section is up-to-date"; | |
5c51ce30 AV |
293 | } |
294 | ||
93b00636 FR |
295 | sub do_generate { |
296 | my ($old, $new) = @_; | |
f40724d5 AV |
297 | my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new); |
298 | ||
299 | if ($manuallyCheck) { | |
59d2de25 AV |
300 | print "\nXXXIt cannot be determined whether the following distributions have changed.\n"; |
301 | print "Please check and list accordingly:\n"; | |
f40724d5 | 302 | say "\t$_" for @{$manuallyCheck}; |
59d2de25 | 303 | print "\n"; |
f40724d5 | 304 | } |
93b00636 | 305 | |
3bb80250 AV |
306 | my $data = { |
307 | new => $added, | |
308 | updated => $updated, | |
309 | #removed => $removed, ignore removed for now | |
310 | }; | |
311 | ||
312 | say DeltaUpdater::sections_to_pod($data) | |
93b00636 FR |
313 | } |
314 | ||
315 | sub do_check { | |
316 | my ($in, $old, $new) = @_; | |
317 | ||
318 | my $delta = DeltaParser->new($in); | |
f40724d5 | 319 | my ($added, $removed, $updated) = corelist_delta($old => $new); |
93b00636 | 320 | |
fc353283 AV |
321 | # because of the difficulty in identifying the distribution for removed modules |
322 | # don't bother checking them | |
f40724d5 | 323 | for my $ck ([ 'new', $delta->new_modules, $added ], |
fc353283 | 324 | #[ 'removed', $delta->removed_modules, $removed ], |
f40724d5 | 325 | [ 'updated', $delta->updated_modules, $updated ] ) { |
93b00636 | 326 | my @delta = @{ $ck->[1] }; |
f40724d5 | 327 | my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] }; |
93b00636 FR |
328 | |
329 | printf $ck->[0] . ":\n"; | |
330 | ||
e8097ff2 | 331 | require Algorithm::Diff; |
93b00636 FR |
332 | my $diff = Algorithm::Diff->new(map { |
333 | [map { join q{ } => grep defined, @{ $_ } } @{ $_ }] | |
334 | } \@delta, \@corelist); | |
335 | ||
336 | while ($diff->Next) { | |
337 | next if $diff->Same; | |
338 | my $sep = ''; | |
339 | if (!$diff->Items(2)) { | |
340 | printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 )); | |
341 | } elsif(!$diff->Items(1)) { | |
342 | printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 )); | |
343 | } else { | |
344 | $sep = "---\n"; | |
345 | printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 )); | |
346 | } | |
fc353283 | 347 | print "Delta< $_\n" for $diff->Items(1); |
93b00636 | 348 | print $sep; |
fc353283 | 349 | print "Corelist> $_\n" for $diff->Items(2); |
93b00636 FR |
350 | } |
351 | ||
352 | print "\n"; | |
353 | } | |
354 | } | |
355 | ||
356 | { | |
5c51ce30 AV |
357 | |
358 | package DeltaUpdater; | |
359 | use List::Util 'reduce'; | |
360 | ||
361 | sub get_section_name_from_heading { | |
362 | my $heading = shift; | |
363 | while (my ($key, $expression) = each %sections) { | |
364 | if ($heading =~ $expression) { | |
365 | return $titles{$key}; | |
366 | } | |
367 | } | |
368 | die "$heading did not match any section"; | |
369 | } | |
370 | ||
371 | sub is_desired_section_name { | |
372 | for (values %sections) { | |
373 | return 1 if $_[0] =~ $_; | |
374 | } | |
375 | return 0; | |
376 | } | |
377 | ||
378 | # verify the module and pragmata in the section, changing the stated version if necessary | |
379 | # this subroutine warns if the module name cannot be parsed or if it is not listed in | |
380 | # the results returned from corelist_delta() | |
381 | # | |
382 | # a side-effect of calling this function is that modules present in the section are | |
383 | # removed from $data, resulting in $data containing only those modules and pragmata | |
384 | # that were not listed in the perldelta file. This means we can then pass $data to | |
385 | # add_to_section() without worrying about filtering out duplicates | |
386 | sub update_section { | |
387 | my ( $section, $data, $title ) = @_; | |
388 | my @items = @{ $section->{items} }; | |
389 | ||
390 | for my $item (@items) { | |
391 | ||
392 | my $content = $item->{text}; | |
393 | my $module = $item->{name}; | |
394 | ||
318148ea AV |
395 | #skip dummy items |
396 | next if !$module and $content =~ /\s*xx*\s*/i; | |
397 | ||
5c51ce30 | 398 | say "Could not parse module name; line is:\n\t$content" and next unless $module; |
59d2de25 AV |
399 | |
400 | if ( !$data->{$title}{$module} ) { | |
401 | print "$module is not listed as being $title in Module::CoreList.\n"; | |
402 | print "Ensure Module::CoreList has been updated and\n"; | |
403 | print "check to see that the distribution is not listed under another name.\n\n"; | |
404 | next; | |
405 | } | |
5c51ce30 AV |
406 | |
407 | if ( $title eq 'new' ) { | |
408 | my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m; | |
409 | say "Could not parse new version for $module; line is:\n\t$content" and next unless $new; | |
410 | if ( $data->{$title}{$module}[2] ne $new ) { | |
411 | say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2]; | |
412 | } | |
413 | $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me; | |
414 | } | |
415 | ||
416 | elsif ( $title eq 'updated' ) { | |
417 | my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s; | |
418 | say "Could not parse old and new version for $module; line is:\n\t$content" and next | |
419 | unless $prev and $new; | |
420 | if ( $data->{$title}{$module}[1] ne $prev ) { | |
421 | say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1]; | |
422 | } | |
423 | if ( $data->{$title}{$module}[2] ne $new ) { | |
424 | say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2]; | |
425 | } | |
426 | $content =~ | |
427 | s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se; | |
428 | } | |
429 | ||
430 | elsif ( $title eq 'removed' ) { | |
431 | my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m; | |
432 | say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev; | |
433 | if ( $data->{$title}{$module}[1] ne $prev ) { | |
434 | say "$module: previous version differs; $prev " . $data->{$title}{$module}[1]; | |
435 | } | |
436 | $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me; | |
437 | } | |
438 | ||
439 | delete $data->{$title}{$module}; | |
440 | $item->{text} = $content; | |
441 | } | |
442 | return $section; | |
443 | } | |
444 | ||
445 | # add modules and pragmata present in $data to the section | |
446 | sub add_to_section { | |
447 | my ( $section, $data, $title ) = @_; | |
448 | ||
8b591357 | 449 | #undef is a valid version name in Module::CoreList so suppress warnings about concatenating undef values |
5c51ce30 AV |
450 | no warnings 'uninitialized'; |
451 | for ( values %{ $data->{$title} } ) { | |
452 | my ( $mod, $old_v, $new_v ) = @{$_}; | |
453 | my ( $item, $text ); | |
454 | ||
455 | $item = { name => $mod, text => "=item *\n" }; | |
456 | if ( $title eq 'new' ) { | |
457 | $text = "L<$mod> $new_v has been added to the Perl core.\n"; | |
458 | } | |
459 | ||
460 | elsif ( $title eq 'updated' ) { | |
461 | $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n"; | |
462 | if ( $deprecated->{$mod} ) { | |
463 | $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n"; | |
464 | } | |
465 | } | |
466 | ||
467 | elsif ( $title eq 'removed' ) { | |
468 | $text = "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n"; | |
469 | } | |
470 | ||
471 | $item->{text} .= "\n$text\n"; | |
472 | push @{ $section->{items} }, $item; | |
473 | } | |
474 | return $section; | |
475 | } | |
476 | ||
477 | sub sort_items_in_section { | |
478 | my ($section) = @_; | |
479 | ||
480 | # if we could not parse the module name, it will be uninitalized | |
481 | # in sort. This is not a problem as it will just result in these | |
8b591357 | 482 | # sections being placed near the beginning of the section |
5c51ce30 AV |
483 | no warnings 'uninitialized'; |
484 | $section->{items} = | |
485 | [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ]; | |
486 | return $section; | |
487 | } | |
488 | ||
489 | # given a hashref of the form returned by corelist_delta() | |
490 | # and a hash structured as documented in transform_pod(), it returns | |
491 | # a pod string representation of the sections, creating sections | |
492 | # if necessary | |
493 | sub sections_to_pod { | |
494 | my ( $data, %sections ) = @_; | |
495 | my $out = ''; | |
496 | ||
497 | for ( | |
498 | ( | |
499 | [ 'New Modules and Pragmata', 'new' ], | |
500 | [ 'Updated Modules and Pragmata', 'updated' ], | |
501 | [ 'Removed Modules and Pragmata', 'removed' ] | |
502 | ) | |
503 | ) | |
504 | { | |
505 | my ( $section_name, $title ) = @{$_}; | |
506 | ||
507 | my $section = $sections{$section_name} // { | |
3c7fa354 SH |
508 | name => $section_name, |
509 | preceding_text => "=head2 $_->[0]\n=over 4\n", | |
510 | following_text => "=back\n", | |
511 | items => [], | |
512 | manual => 1 | |
5c51ce30 AV |
513 | }; |
514 | ||
515 | $section = update_section( $section, $data, $title ); | |
516 | $section = add_to_section( $section, $data, $title ); | |
517 | $section = sort_items_in_section( $section ); | |
518 | ||
519 | next if $section->{manual} and scalar @{ $section->{items} } == 0; | |
520 | ||
521 | my $items = reduce { no warnings 'once'; $a . $b->{text} } | |
522 | ( '', @{ $section->{items} } ); | |
523 | $out .= | |
3c7fa354 | 524 | ( $section->{preceding_text} // '' ) |
5c51ce30 AV |
525 | . $items |
526 | . ( $section->{following_text} // '' ); | |
527 | } | |
528 | return $out; | |
529 | } | |
530 | ||
531 | # given a filename corresponding to an existing perldelta file | |
532 | # and a hashref of the form returned by corelist_delta(), it | |
533 | # returns a string of the resulting file after the module | |
534 | # information has been added. | |
535 | sub transform_pod { | |
536 | my ( $existing, $data ) = @_; | |
537 | ||
538 | # will contain hashrefs corresponding to new, updated and removed | |
539 | # modules and pragmata keyed by section name | |
540 | # each section is hashref of the structure | |
3c7fa354 SH |
541 | # preceding_text => Text occurring before and including the over |
542 | # region containing the list of modules, | |
543 | # items => [Arrayref of hashrefs corresponding to a module | |
544 | # entry], | |
5c51ce30 AV |
545 | # an entry has the form: |
546 | # name => Module name or undef if the name could not be determined | |
547 | # text => The text of the entry, including the item heading | |
548 | # | |
3c7fa354 SH |
549 | # following_text => Any text not corresponding to a module |
550 | # that occurs after the first module | |
5c51ce30 AV |
551 | # |
552 | # the sections are converted to a pod string by calling sections_to_pod() | |
553 | my %sections; | |
554 | ||
555 | # we are in the Modules_and_Pragmata's section | |
556 | my $in_Modules_and_Pragmata; | |
3c7fa354 | 557 | |
5c51ce30 AV |
558 | # we are the Modules_and_Pragmata's section but have not |
559 | # encountered any of the desired sections. We use this | |
560 | # flag to determine whether we should append the text to $out | |
561 | # or we need to delay appending until the module listings are | |
562 | # processed and instead append to $append_to_out | |
563 | my $in_Modules_and_Pragmata_preamble; | |
3c7fa354 | 564 | |
5c51ce30 AV |
565 | my $done_processing_Modules_and_Pragmata; |
566 | ||
567 | my $current_section; | |
3c7fa354 | 568 | |
5c51ce30 AV |
569 | # $nested_element_level == 0 : not in an over region, treat lines as text |
570 | # $nested_element_level == 1 : presumably in the top over region that | |
571 | # corresponds to the module listing. Treat | |
572 | # each item as a module | |
573 | # $nested_element_level > 1 : we only consider these values when we are in an item | |
574 | # We treat lines as the text of the current item. | |
575 | my $nested_element_level = 0; | |
3c7fa354 | 576 | |
5c51ce30 AV |
577 | my $current_item; |
578 | my $need_to_parse_module_name; | |
579 | ||
580 | my $out = ''; | |
581 | my $append_to_out = ''; | |
582 | ||
583 | open my $fh, '<', $existing or die "can't open perldelta file $existing: $!"; | |
9c7cb461 | 584 | binmode($fh); |
5c51ce30 AV |
585 | |
586 | while (<$fh>) { | |
587 | # treat the rest of the file as plain text | |
588 | if ($done_processing_Modules_and_Pragmata) { | |
589 | $out .= $_; | |
590 | next; | |
591 | } | |
592 | ||
593 | elsif ( !$in_Modules_and_Pragmata ) { | |
594 | # entering Modules and Pragmata | |
595 | if (/^=head1 Modules and Pragmata/) { | |
596 | $in_Modules_and_Pragmata = 1; | |
597 | $in_Modules_and_Pragmata_preamble = 1; | |
598 | } | |
599 | $out .= $_; | |
600 | next; | |
601 | } | |
602 | ||
603 | # leaving Modules and Pragmata | |
604 | elsif (/^=head1/) { | |
605 | if ($current_section) { | |
606 | push @{ $current_section->{items} }, $current_item | |
607 | if $current_item; | |
608 | $sections{ $current_section->{name} } = $current_section; | |
609 | } | |
610 | $done_processing_Modules_and_Pragmata = 1; | |
611 | $out .= | |
612 | sections_to_pod( $data, %sections ) . $append_to_out . $_; | |
613 | next; | |
614 | } | |
615 | ||
616 | # new section in Modules and Pragmata | |
617 | elsif (/^=head2 (.*?)$/) { | |
618 | my $name = $1; | |
619 | if ($current_section) { | |
620 | push @{ $current_section->{items} }, $current_item | |
621 | if $current_item; | |
622 | $sections{ $current_section->{name} } = $current_section; | |
623 | undef $current_section; | |
624 | } | |
625 | ||
626 | if ( is_desired_section_name($name) ) { | |
627 | undef $in_Modules_and_Pragmata_preamble; | |
628 | if ( $nested_element_level > 0 ) { | |
629 | die "Unexpected head2 at line no. $."; | |
630 | } | |
631 | my $title = get_section_name_from_heading($name); | |
632 | if ( exists $sections{$title} ) { | |
3c7fa354 | 633 | die "$name occurred twice at line no. $."; |
5c51ce30 | 634 | } |
3c7fa354 SH |
635 | $current_section = {}; |
636 | $current_section->{name} = $title; | |
637 | $current_section->{preceding_text} = $_; | |
638 | $current_section->{items} = []; | |
639 | $nested_element_level = 0; | |
5c51ce30 AV |
640 | next; |
641 | } | |
642 | ||
643 | # otherwise treat section as plain text | |
644 | else { | |
645 | if ($in_Modules_and_Pragmata_preamble) { | |
646 | $out .= $_; | |
647 | } | |
648 | else { | |
649 | $append_to_out .= $_; | |
650 | } | |
651 | next; | |
652 | } | |
653 | } | |
654 | ||
655 | elsif ($current_section) { | |
656 | ||
657 | # not in an over region | |
658 | if ( $nested_element_level == 0 ) { | |
659 | if (/^=over/) { | |
660 | $nested_element_level++; | |
661 | } | |
662 | if ( scalar @{ $current_section->{items} } > 0 ) { | |
663 | $current_section->{following_text} .= $_; | |
664 | } | |
665 | else { | |
3c7fa354 | 666 | $current_section->{preceding_text} .= $_; |
5c51ce30 AV |
667 | } |
668 | next; | |
669 | } | |
670 | ||
671 | if ($current_item) { | |
672 | if ($need_to_parse_module_name) { | |
673 | # the item may not have a parsable module name, which means that | |
674 | # $current_item->{name} will never be defined. | |
675 | if (/^(?:L|C)<(.+?)>/) { | |
676 | $current_item->{name} = $1; | |
677 | undef $need_to_parse_module_name; | |
678 | } | |
679 | # =item or =back signals the end of an item | |
680 | # block, which we handle below | |
681 | if ( !/^=(?:item|back)/ ) { | |
682 | $current_item->{text} .= $_; | |
683 | next; | |
684 | } | |
685 | } | |
686 | # currently in an over region | |
687 | # treat text inside region as plain text | |
688 | if ( $nested_element_level > 1 ) { | |
689 | if (/^=back/) { | |
690 | $nested_element_level--; | |
691 | } | |
692 | elsif (/^=over/) { | |
693 | $nested_element_level++; | |
694 | } | |
695 | $current_item->{text} .= $_; | |
696 | next; | |
697 | } | |
698 | # entering over region | |
699 | if (/^=over/) { | |
700 | $nested_element_level++; | |
701 | $current_item->{text} .= $_; | |
702 | next; | |
703 | } | |
704 | # =item or =back signals the end of an item | |
705 | # block, which we handle below | |
706 | if ( !/^=(?:item|back)/ ) { | |
707 | $current_item->{text} .= $_; | |
708 | next; | |
709 | } | |
710 | } | |
711 | ||
712 | if (/^=item \*/) { | |
713 | push @{ $current_section->{items} }, $current_item | |
714 | if $current_item; | |
715 | $current_item = { text => $_ }; | |
716 | $need_to_parse_module_name = 1; | |
717 | next; | |
718 | } | |
719 | ||
720 | if (/^=back/) { | |
721 | push @{ $current_section->{items} }, $current_item | |
722 | if $current_item; | |
723 | undef $current_item; | |
724 | $nested_element_level--; | |
725 | } | |
726 | ||
727 | if ( scalar @{ $current_section->{items} } == 0 ) { | |
3c7fa354 | 728 | $current_section->{preceding_text} .= $_; |
5c51ce30 AV |
729 | } |
730 | else { | |
731 | $current_section->{following_text} .= $_; | |
732 | } | |
733 | next; | |
734 | } | |
735 | ||
736 | # text in Modules and Pragmata not in a head2 region | |
737 | else { | |
738 | if ($in_Modules_and_Pragmata_preamble) { | |
739 | $out .= $_; | |
740 | } | |
741 | else { | |
742 | $append_to_out .= $_; | |
743 | } | |
744 | next; | |
745 | } | |
746 | } | |
747 | close $fh; | |
748 | die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata; | |
749 | return $out; | |
750 | } | |
751 | ||
752 | } | |
753 | ||
754 | { | |
93b00636 FR |
755 | package DeltaParser; |
756 | use Pod::Simple::SimpleTree; | |
757 | ||
758 | sub new { | |
759 | my ($class, $input) = @_; | |
760 | ||
761 | my $self = bless {} => $class; | |
762 | ||
763 | my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root; | |
764 | splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure, | |
8d132c8e | 765 | # just the nodes within it |
93b00636 FR |
766 | |
767 | $self->_parse_delta($parsed_pod); | |
768 | ||
769 | return $self; | |
770 | } | |
771 | ||
c014dfad AV |
772 | # creates the accessor methods: |
773 | # new_modules | |
774 | # updated_modules | |
775 | # removed_modules | |
93b00636 FR |
776 | for my $k (keys %sections) { |
777 | no strict 'refs'; | |
778 | my $m = "${k}_modules"; | |
779 | *$m = sub { $_[0]->{$m} }; | |
780 | } | |
781 | ||
782 | sub _parse_delta { | |
783 | my ($self, $pod) = @_; | |
784 | ||
c014dfad AV |
785 | my $new_section = $self->_look_for_section( $pod, $sections{new} ); |
786 | my $updated_section = $self->_look_for_section( $pod, $sections{updated} ); | |
787 | my $removed_section = $self->_look_for_section( $pod, $sections{removed} ); | |
788 | ||
789 | $self->_parse_new_section($new_section); | |
790 | $self->_parse_updated_section($updated_section); | |
791 | $self->_parse_removed_section($removed_section); | |
792 | ||
793 | for (qw/new_modules updated_modules removed_modules/) { | |
794 | $self->{$_} = | |
795 | [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ]; | |
93b00636 FR |
796 | } |
797 | ||
798 | return; | |
799 | } | |
800 | ||
801 | sub _parse_new_section { | |
802 | my ($self, $section) = @_; | |
803 | ||
c014dfad AV |
804 | $self->{new_modules} = []; |
805 | return unless $section; | |
93b00636 FR |
806 | $self->{new_modules} = $self->_parse_section($section => sub { |
807 | my ($el) = @_; | |
808 | ||
809 | my ($first, $second) = @{ $el }[2, 3]; | |
810 | my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/; | |
811 | ||
812 | return [ $first->[2], undef, $ver ]; | |
813 | }); | |
814 | ||
815 | return; | |
816 | } | |
817 | ||
818 | sub _parse_updated_section { | |
819 | my ($self, $section) = @_; | |
820 | ||
3668a9e5 AV |
821 | $self->{updated_modules} = []; |
822 | return unless $section; | |
93b00636 FR |
823 | $self->{updated_modules} = $self->_parse_section($section => sub { |
824 | my ($el) = @_; | |
825 | ||
826 | my ($first, $second) = @{ $el }[2, 3]; | |
827 | my $module = $first->[2]; | |
3668a9e5 AV |
828 | |
829 | # the regular expression matches the following: | |
830 | # from VERSION_NUMBER to VERSION_NUMBER | |
831 | # from VERSION_NUMBER to VERSION_NUMBER. | |
832 | # from version VERSION_NUMBER to version VERSION_NUMBER. | |
833 | # from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER | |
834 | # from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER | |
835 | # | |
3c7fa354 | 836 | # some perldeltas contain more than one module listed in an entry, this only attempts to match the |
3668a9e5 | 837 | # first module |
93b00636 | 838 | my ($old, $new) = $second =~ |
3668a9e5 | 839 | /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s; |
93b00636 FR |
840 | |
841 | warn "Unable to extract old or new version of $module from perldelta" | |
842 | if !defined $old || !defined $new; | |
843 | ||
844 | return [ $module, $old, $new ]; | |
845 | }); | |
846 | ||
847 | return; | |
848 | } | |
849 | ||
850 | sub _parse_removed_section { | |
851 | my ($self, $section) = @_; | |
c014dfad AV |
852 | |
853 | $self->{removed_modules} = []; | |
854 | return unless $section; | |
93b00636 FR |
855 | $self->{removed_modules} = $self->_parse_section($section => sub { |
856 | my ($el) = @_; | |
857 | ||
858 | my ($first, $second) = @{ $el }[2, 3]; | |
859 | my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/; | |
860 | ||
861 | return [ $first->[2], $old, undef ]; | |
862 | }); | |
863 | ||
864 | return; | |
865 | } | |
866 | ||
867 | sub _parse_section { | |
868 | my ($self, $section, $parser) = @_; | |
869 | ||
870 | my $items = $self->_look_down($section => sub { | |
871 | my ($el) = @_; | |
872 | return unless ref $el && $el->[0] =~ /^item-/ | |
873 | && @{ $el } > 2 && ref $el->[2]; | |
5c404e2c | 874 | return unless $el->[2]->[0] =~ /C|L/; |
93b00636 FR |
875 | |
876 | return 1; | |
877 | }); | |
878 | ||
879 | return [map { $parser->($_) } @{ $items }]; | |
880 | } | |
881 | ||
882 | sub _look_down { | |
883 | my ($self, $pod, $predicate) = @_; | |
884 | my @pod = @{ $pod }; | |
885 | ||
886 | my @l; | |
887 | while (my $el = shift @pod) { | |
888 | push @l, $el if $predicate->($el); | |
889 | if (ref $el) { | |
890 | my @el = @{ $el }; | |
891 | splice @el, 0, 2; | |
892 | unshift @pod, @el if @el; | |
893 | } | |
894 | } | |
895 | ||
896 | return @l ? \@l : undef; | |
897 | } | |
898 | ||
899 | sub _look_for_section { | |
900 | my ($self, $pod, $section) = @_; | |
901 | ||
902 | my $level; | |
903 | $self->_look_for_range($pod, | |
904 | sub { | |
905 | my ($el) = @_; | |
86c08a2c | 906 | my ($heading) = $el->[0] =~ /^head(\d)$/; |
b88937b3 | 907 | my $f = $heading && $el->[2] =~ /^$section/; |
86c08a2c | 908 | $level = $heading if $f && !$level; |
93b00636 FR |
909 | return $f; |
910 | }, | |
911 | sub { | |
912 | my ($el) = @_; | |
913 | $el->[0] =~ /^head(\d)$/ && $1 <= $level; | |
914 | }, | |
915 | ); | |
916 | } | |
917 | ||
918 | sub _look_for_range { | |
919 | my ($self, $pod, $start_predicate, $stop_predicate) = @_; | |
920 | ||
921 | my @l; | |
922 | for my $el (@{ $pod }) { | |
923 | if (@l) { | |
924 | return \@l if $stop_predicate->($el); | |
925 | } | |
926 | else { | |
927 | next unless $start_predicate->($el); | |
928 | } | |
929 | push @l, $el; | |
930 | } | |
931 | ||
932 | return; | |
933 | } | |
2a19fd14 | 934 | } |
88e14305 | 935 | |
2a19fd14 | 936 | run; |