Commit | Line | Data |
---|---|---|
603f4ea0 DG |
1 | #!perl |
2 | use 5.010; | |
3 | use strict; | |
4 | use warnings; | |
5 | use lib 'Porting'; | |
6 | use Maintainers qw/%Modules/; | |
5e0e6da6 | 7 | use lib 'dist/Module-CoreList/lib'; |
603f4ea0 | 8 | use Module::CoreList; |
2a19fd14 | 9 | use Getopt::Long; |
e8097ff2 MM |
10 | |
11 | =head1 USAGE | |
12 | ||
13 | # generate the module changes for the Perl you are currently building | |
67e92e89 | 14 | ./perl -Ilib Porting/corelist-perldelta.pl |
5c51ce30 AV |
15 | |
16 | # update the module changes for the Perl you are currently building | |
59d2de25 | 17 | ./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod |
5c51ce30 | 18 | |
e8097ff2 | 19 | # generate a diff between the corelist sections of two perldelta* files: |
80da4b39 | 20 | perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod |
e8097ff2 MM |
21 | |
22 | =head1 ABOUT | |
23 | ||
24 | corelist-perldelta.pl is a bit schizophrenic. The part to generate the | |
25 | new Perldelta text does not need Algorithm::Diff, but wants to be | |
26 | run with the freshly built Perl. | |
27 | ||
28 | The part to check the diff wants to be run with a Perl that has an up-to-date | |
29 | L<Module::CoreList>, but needs the outside L<Algorithm::Diff>. | |
30 | ||
31 | Ideally, the program will be split into two separate programs, one | |
b88937b3 | 32 | to generate the text and one to show the diff between the |
e8097ff2 MM |
33 | corelist sections of the last perldelta and the next perldelta. |
34 | ||
59d2de25 AV |
35 | Currently no information about Removed Modules is displayed in any of the |
36 | modes. | |
e8097ff2 | 37 | =cut |
93b00636 FR |
38 | |
39 | my %sections = ( | |
8d132c8e MM |
40 | new => qr/New Modules and Pragma(ta)?/, |
41 | updated => qr/Updated Modules and Pragma(ta)?/, | |
42 | removed => qr/Removed Modules and Pragma(ta)?/, | |
43 | ); | |
44 | ||
45 | my %titles = ( | |
caaa1415 MM |
46 | new => 'New Modules and Pragmata', |
47 | updated => 'Updated Modules and Pragmata', | |
48 | removed => 'Removed Modules and Pragmata', | |
93b00636 | 49 | ); |
603f4ea0 | 50 | |
88e14305 DG |
51 | my $deprecated; |
52 | ||
2a19fd14 FR |
53 | sub run { |
54 | my %opt = (mode => 'generate'); | |
55 | ||
56 | GetOptions(\%opt, | |
5c51ce30 | 57 | 'mode|m:s', # 'generate', 'check', 'update' |
2a19fd14 FR |
58 | ); |
59 | ||
93b00636 FR |
60 | # by default, compare latest two version in CoreList; |
61 | my @versions = sort keys %Module::CoreList::version; | |
5e1cf63b AV |
62 | my $old = $versions[-2]; |
63 | my $new = $versions[-1]; | |
64 | ||
65 | # use the provided versions if present | |
66 | # @ARGV >=2 means [old_version] [new_version] [path/to/file] | |
67 | if ( @ARGV >= 2) { | |
68 | ($old, $new) = (shift @ARGV, shift @ARGV); | |
69 | die "$old is an invalid version\n" if not exists | |
70 | $Module::CoreList::version{$old}; | |
71 | die "$new is an invalid verison\n" if not exists | |
72 | $Module::CoreList::version{$new}; | |
73 | } | |
2a19fd14 | 74 | |
93b00636 | 75 | if ( $opt{mode} eq 'generate' ) { |
2a19fd14 | 76 | do_generate($old => $new); |
88e14305 | 77 | } |
93b00636 FR |
78 | elsif ( $opt{mode} eq 'check' ) { |
79 | do_check(\*ARGV, $old => $new); | |
80 | } | |
5c51ce30 AV |
81 | elsif ( $opt{mode} eq 'update' ) { |
82 | do_update_existing(shift @ARGV, $old => $new); | |
83 | } | |
88e14305 | 84 | else { |
2a19fd14 FR |
85 | die "Unrecognized mode '$opt{mode}'\n"; |
86 | } | |
87 | ||
88 | exit 0; | |
89 | } | |
90 | ||
f40724d5 AV |
91 | # Given two perl versions, it returns a list describing the core distributions that have changed. |
92 | # The first three elements are hashrefs corresponding to new, updated, and removed modules | |
93 | # and are of the form (mostly, see the special remarks about removed): | |
94 | # 'Distribution Name' => ['Distribution Name', previous version number, current version number] | |
3c7fa354 SH |
95 | # where the version number is undef if the distribution did not exist. |
96 | # The fourth element is an arrayref of core distribution names of those distribution for which it | |
97 | # is unknown whether they have changed and therefore need to be manually checked. | |
f40724d5 AV |
98 | # |
99 | # In most cases, the distribution name in %Modules corresponds to the module that is representative | |
100 | # of the distribution as listed in Module::CoreList. However, there are a few distribution names | |
3c7fa354 | 101 | # that do not correspond to a module. %distToModules has been created which maps the distribution |
f40724d5 AV |
102 | # name to a representative module. The representative module was chosen by either looking at the |
103 | # Makefile of the distribution or by seeing which module the distribution has been traditionally | |
3c7fa354 | 104 | # listed under in past perldeltas. |
f40724d5 AV |
105 | # |
106 | # There are a few distributions for which there is no single representative module (e.g. libnet). | |
107 | # These distributions are returned as the last element of the list. | |
108 | # | |
109 | # %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p. | |
110 | # This list contains modules and pragmata that may also be present in Module::CoreList. | |
111 | # A list of modules are in the list @unclaimedModules, which were manually listed based on whether | |
3c7fa354 | 112 | # they were independent modules and whether they have been listed in past perldeltas. |
f40724d5 AV |
113 | # The pragmata were found by doing something like: |
114 | # say for sort grep { $_ eq lc $_ and !exists $Modules{$_}} | |
115 | # keys %{$Module::CoreList::version{'5.019003'}} | |
116 | # and manually filtering out pragamata that were already covered. | |
117 | # | |
118 | # It is currently not possible to differentiate between a removed module and a removed | |
119 | # distribution. Therefore, the removed hashref contains every module that has been removed, even if | |
120 | # the module's corresponding distribution has not been removed. | |
121 | ||
93b00636 | 122 | sub corelist_delta { |
2a19fd14 FR |
123 | my ($old, $new) = @_; |
124 | my $corelist = \%Module::CoreList::version; | |
f40724d5 | 125 | my %changes = Module::CoreList::changes_between( $old, $new ); |
2a19fd14 FR |
126 | $deprecated = $Module::CoreList::deprecated{$new}; |
127 | ||
f40724d5 AV |
128 | my $getModifyType = sub { |
129 | my $data = shift; | |
130 | if ( exists $data->{left} and exists $data->{right} ) { | |
131 | return 'updated'; | |
132 | } | |
133 | elsif ( !exists $data->{left} and exists $data->{right} ) { | |
134 | return 'new'; | |
135 | } | |
136 | elsif ( exists $data->{left} and !exists $data->{right} ) { | |
137 | return 'removed'; | |
138 | } | |
139 | return undef; | |
140 | }; | |
141 | ||
142 | my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap Win32CORE/; | |
143 | my @unclaimedPragmata = qw/_charnames arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/; | |
144 | my @unclaimed = (@unclaimedModules, @unclaimedPragmata); | |
145 | ||
146 | my %distToModules = ( | |
147 | 'IO-Compress' => [ | |
148 | { | |
149 | 'name' => 'IO-Compress', | |
150 | 'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ), | |
151 | 'data' => $changes{'IO::Compress::Base'} | |
152 | } | |
153 | ], | |
154 | 'Locale-Codes' => [ | |
155 | { | |
156 | 'name' => 'Locale::Codes', | |
157 | 'modification' => $getModifyType->( $changes{'Locale::Codes'} ), | |
158 | 'data' => $changes{'Locale::Codes'} | |
159 | } | |
160 | ], | |
161 | 'PathTools' => [ | |
162 | { | |
163 | 'name' => 'File::Spec', | |
164 | 'modification' => $getModifyType->( $changes{'Cwd'} ), | |
165 | 'data' => $changes{'Cwd'} | |
166 | } | |
167 | ], | |
168 | 'Scalar-List-Utils' => [ | |
169 | { | |
170 | 'name' => 'List::Util', | |
171 | 'modification' => $getModifyType->( $changes{'List::Util'} ), | |
172 | 'data' => $changes{'List::Util'} | |
173 | }, | |
174 | { | |
175 | 'name' => 'Scalar::Util', | |
176 | 'modification' => $getModifyType->( $changes{'Scalar::Util'} ), | |
177 | 'data' => $changes{'Scalar::Util'} | |
178 | } | |
179 | ], | |
180 | 'Text-Tabs+Wrap' => [ | |
181 | { | |
182 | 'name' => 'Text::Tabs', | |
183 | 'modification' => $getModifyType->( $changes{'Text::Tabs'} ), | |
184 | 'data' => $changes{'Text::Tabs'} | |
185 | }, | |
186 | { | |
187 | 'name' => 'Text::Wrap', | |
188 | 'modification' => $getModifyType->( $changes{'Text::Wrap'} ), | |
189 | 'data' => $changes{'Text::Wrap'} | |
190 | } | |
191 | ], | |
192 | ); | |
193 | ||
194 | # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ] | |
195 | my $deltaGrouping = {}; | |
196 | ||
197 | # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it | |
198 | my @manuallyCheck; | |
2a19fd14 FR |
199 | |
200 | # %Modules defines what is currently in core | |
201 | for my $k ( keys %Modules ) { | |
f40724d5 AV |
202 | next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed |
203 | next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed | |
204 | ||
205 | my ( $distName, $modifyType, $data ); | |
206 | ||
207 | if ( exists $changes{$k} ) { | |
208 | $distName = $k; | |
209 | $modifyType = $getModifyType->( $changes{$k} ); | |
210 | $data = $changes{$k}; | |
88e14305 | 211 | } |
f40724d5 AV |
212 | elsif ( exists $distToModules{$k} ) { |
213 | # modification will be undef if the distribution has not changed | |
214 | my @modules = grep { $_->{modification} } @{ $distToModules{$k} }; | |
215 | for (@modules) { | |
216 | $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ]; | |
2a19fd14 | 217 | } |
f40724d5 AV |
218 | next; |
219 | } | |
220 | else { | |
221 | push @manuallyCheck, $k and next; | |
222 | } | |
223 | ||
224 | $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ]; | |
225 | } | |
226 | ||
227 | for my $k (@unclaimed) { | |
228 | if ( exists $changes{$k} ) { | |
229 | $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} = | |
230 | [ $k, $changes{$k}->{left}, $changes{$k}->{right} ]; | |
88e14305 DG |
231 | } |
232 | } | |
603f4ea0 | 233 | |
2a19fd14 FR |
234 | # in old corelist, but not this one => removed |
235 | # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from | |
236 | # distributions will show up here, too. Some person will have to review to see what's | |
237 | # important. That's the best we can do without a historical Maintainers.pl | |
238 | for my $k ( keys %{ $corelist->{$old} } ) { | |
239 | if ( ! exists $corelist->{$new}{$k} ) { | |
f40724d5 | 240 | $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ]; |
2a19fd14 | 241 | } |
88e14305 | 242 | } |
88e14305 | 243 | |
f40724d5 AV |
244 | return ( |
245 | \%{ $deltaGrouping->{'new'} }, | |
246 | \%{ $deltaGrouping->{'removed'} }, | |
247 | \%{ $deltaGrouping->{'updated'} }, | |
248 | \@manuallyCheck | |
249 | ); | |
93b00636 FR |
250 | } |
251 | ||
5c51ce30 AV |
252 | # currently does not update the Removed Module section |
253 | sub do_update_existing { | |
254 | my ( $existing, $old, $new ) = @_; | |
255 | ||
256 | my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new ); | |
257 | if ($manuallyCheck) { | |
59d2de25 AV |
258 | print "It cannot be determined whether the following distributions have changed.\n"; |
259 | print "Please check and list accordingly:\n"; | |
5c51ce30 | 260 | say "\t* $_" for sort @{$manuallyCheck}; |
59d2de25 | 261 | print "\n"; |
5c51ce30 AV |
262 | } |
263 | ||
264 | my $data = { | |
265 | new => $added, | |
266 | updated => $updated, | |
267 | #removed => $removed, ignore removed for now | |
268 | }; | |
269 | ||
270 | my $text = DeltaUpdater::transform_pod( $existing, $data ); | |
271 | open my $out, '>', $existing or die "can't open perldelta file $existing: $!"; | |
9c7cb461 | 272 | binmode($out); |
5c51ce30 AV |
273 | print $out $text; |
274 | close $out; | |
59d2de25 AV |
275 | say "The New and Updated Modules and Pragamata sections in $existing have been updated"; |
276 | say "Please ensure the Removed Modules and Pragmata section is up-to-date"; | |
5c51ce30 AV |
277 | } |
278 | ||
93b00636 FR |
279 | sub do_generate { |
280 | my ($old, $new) = @_; | |
f40724d5 AV |
281 | my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new); |
282 | ||
283 | if ($manuallyCheck) { | |
59d2de25 AV |
284 | print "\nXXXIt cannot be determined whether the following distributions have changed.\n"; |
285 | print "Please check and list accordingly:\n"; | |
f40724d5 | 286 | say "\t$_" for @{$manuallyCheck}; |
59d2de25 | 287 | print "\n"; |
f40724d5 | 288 | } |
93b00636 | 289 | |
3bb80250 AV |
290 | my $data = { |
291 | new => $added, | |
292 | updated => $updated, | |
293 | #removed => $removed, ignore removed for now | |
294 | }; | |
295 | ||
296 | say DeltaUpdater::sections_to_pod($data) | |
93b00636 FR |
297 | } |
298 | ||
299 | sub do_check { | |
300 | my ($in, $old, $new) = @_; | |
301 | ||
302 | my $delta = DeltaParser->new($in); | |
f40724d5 | 303 | my ($added, $removed, $updated) = corelist_delta($old => $new); |
93b00636 | 304 | |
fc353283 AV |
305 | # because of the difficulty in identifying the distribution for removed modules |
306 | # don't bother checking them | |
f40724d5 | 307 | for my $ck ([ 'new', $delta->new_modules, $added ], |
fc353283 | 308 | #[ 'removed', $delta->removed_modules, $removed ], |
f40724d5 | 309 | [ 'updated', $delta->updated_modules, $updated ] ) { |
93b00636 | 310 | my @delta = @{ $ck->[1] }; |
f40724d5 | 311 | my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] }; |
93b00636 FR |
312 | |
313 | printf $ck->[0] . ":\n"; | |
314 | ||
e8097ff2 | 315 | require Algorithm::Diff; |
93b00636 FR |
316 | my $diff = Algorithm::Diff->new(map { |
317 | [map { join q{ } => grep defined, @{ $_ } } @{ $_ }] | |
318 | } \@delta, \@corelist); | |
319 | ||
320 | while ($diff->Next) { | |
321 | next if $diff->Same; | |
322 | my $sep = ''; | |
323 | if (!$diff->Items(2)) { | |
324 | printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 )); | |
325 | } elsif(!$diff->Items(1)) { | |
326 | printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 )); | |
327 | } else { | |
328 | $sep = "---\n"; | |
329 | printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 )); | |
330 | } | |
fc353283 | 331 | print "Delta< $_\n" for $diff->Items(1); |
93b00636 | 332 | print $sep; |
fc353283 | 333 | print "Corelist> $_\n" for $diff->Items(2); |
93b00636 FR |
334 | } |
335 | ||
336 | print "\n"; | |
337 | } | |
338 | } | |
339 | ||
340 | { | |
5c51ce30 AV |
341 | |
342 | package DeltaUpdater; | |
343 | use List::Util 'reduce'; | |
344 | ||
345 | sub get_section_name_from_heading { | |
346 | my $heading = shift; | |
347 | while (my ($key, $expression) = each %sections) { | |
348 | if ($heading =~ $expression) { | |
349 | return $titles{$key}; | |
350 | } | |
351 | } | |
352 | die "$heading did not match any section"; | |
353 | } | |
354 | ||
355 | sub is_desired_section_name { | |
356 | for (values %sections) { | |
357 | return 1 if $_[0] =~ $_; | |
358 | } | |
359 | return 0; | |
360 | } | |
361 | ||
362 | # verify the module and pragmata in the section, changing the stated version if necessary | |
363 | # this subroutine warns if the module name cannot be parsed or if it is not listed in | |
364 | # the results returned from corelist_delta() | |
365 | # | |
366 | # a side-effect of calling this function is that modules present in the section are | |
367 | # removed from $data, resulting in $data containing only those modules and pragmata | |
368 | # that were not listed in the perldelta file. This means we can then pass $data to | |
369 | # add_to_section() without worrying about filtering out duplicates | |
370 | sub update_section { | |
371 | my ( $section, $data, $title ) = @_; | |
372 | my @items = @{ $section->{items} }; | |
373 | ||
374 | for my $item (@items) { | |
375 | ||
376 | my $content = $item->{text}; | |
377 | my $module = $item->{name}; | |
378 | ||
318148ea AV |
379 | #skip dummy items |
380 | next if !$module and $content =~ /\s*xx*\s*/i; | |
381 | ||
5c51ce30 | 382 | say "Could not parse module name; line is:\n\t$content" and next unless $module; |
59d2de25 AV |
383 | |
384 | if ( !$data->{$title}{$module} ) { | |
385 | print "$module is not listed as being $title in Module::CoreList.\n"; | |
386 | print "Ensure Module::CoreList has been updated and\n"; | |
387 | print "check to see that the distribution is not listed under another name.\n\n"; | |
388 | next; | |
389 | } | |
5c51ce30 AV |
390 | |
391 | if ( $title eq 'new' ) { | |
392 | my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m; | |
393 | say "Could not parse new version for $module; line is:\n\t$content" and next unless $new; | |
394 | if ( $data->{$title}{$module}[2] ne $new ) { | |
395 | say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2]; | |
396 | } | |
397 | $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me; | |
398 | } | |
399 | ||
400 | elsif ( $title eq 'updated' ) { | |
401 | my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s; | |
402 | say "Could not parse old and new version for $module; line is:\n\t$content" and next | |
403 | unless $prev and $new; | |
404 | if ( $data->{$title}{$module}[1] ne $prev ) { | |
405 | say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1]; | |
406 | } | |
407 | if ( $data->{$title}{$module}[2] ne $new ) { | |
408 | say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2]; | |
409 | } | |
410 | $content =~ | |
411 | s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se; | |
412 | } | |
413 | ||
414 | elsif ( $title eq 'removed' ) { | |
415 | my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m; | |
416 | say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev; | |
417 | if ( $data->{$title}{$module}[1] ne $prev ) { | |
418 | say "$module: previous version differs; $prev " . $data->{$title}{$module}[1]; | |
419 | } | |
420 | $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me; | |
421 | } | |
422 | ||
423 | delete $data->{$title}{$module}; | |
424 | $item->{text} = $content; | |
425 | } | |
426 | return $section; | |
427 | } | |
428 | ||
429 | # add modules and pragmata present in $data to the section | |
430 | sub add_to_section { | |
431 | my ( $section, $data, $title ) = @_; | |
432 | ||
433 | #undef is a valid version name in Module::CoreList so supress warnings about concatenating undef values | |
434 | no warnings 'uninitialized'; | |
435 | for ( values %{ $data->{$title} } ) { | |
436 | my ( $mod, $old_v, $new_v ) = @{$_}; | |
437 | my ( $item, $text ); | |
438 | ||
439 | $item = { name => $mod, text => "=item *\n" }; | |
440 | if ( $title eq 'new' ) { | |
441 | $text = "L<$mod> $new_v has been added to the Perl core.\n"; | |
442 | } | |
443 | ||
444 | elsif ( $title eq 'updated' ) { | |
445 | $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n"; | |
446 | if ( $deprecated->{$mod} ) { | |
447 | $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n"; | |
448 | } | |
449 | } | |
450 | ||
451 | elsif ( $title eq 'removed' ) { | |
452 | $text = "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n"; | |
453 | } | |
454 | ||
455 | $item->{text} .= "\n$text\n"; | |
456 | push @{ $section->{items} }, $item; | |
457 | } | |
458 | return $section; | |
459 | } | |
460 | ||
461 | sub sort_items_in_section { | |
462 | my ($section) = @_; | |
463 | ||
464 | # if we could not parse the module name, it will be uninitalized | |
465 | # in sort. This is not a problem as it will just result in these | |
466 | # sections being placed near the begining of the section | |
467 | no warnings 'uninitialized'; | |
468 | $section->{items} = | |
469 | [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ]; | |
470 | return $section; | |
471 | } | |
472 | ||
473 | # given a hashref of the form returned by corelist_delta() | |
474 | # and a hash structured as documented in transform_pod(), it returns | |
475 | # a pod string representation of the sections, creating sections | |
476 | # if necessary | |
477 | sub sections_to_pod { | |
478 | my ( $data, %sections ) = @_; | |
479 | my $out = ''; | |
480 | ||
481 | for ( | |
482 | ( | |
483 | [ 'New Modules and Pragmata', 'new' ], | |
484 | [ 'Updated Modules and Pragmata', 'updated' ], | |
485 | [ 'Removed Modules and Pragmata', 'removed' ] | |
486 | ) | |
487 | ) | |
488 | { | |
489 | my ( $section_name, $title ) = @{$_}; | |
490 | ||
491 | my $section = $sections{$section_name} // { | |
3c7fa354 SH |
492 | name => $section_name, |
493 | preceding_text => "=head2 $_->[0]\n=over 4\n", | |
494 | following_text => "=back\n", | |
495 | items => [], | |
496 | manual => 1 | |
5c51ce30 AV |
497 | }; |
498 | ||
499 | $section = update_section( $section, $data, $title ); | |
500 | $section = add_to_section( $section, $data, $title ); | |
501 | $section = sort_items_in_section( $section ); | |
502 | ||
503 | next if $section->{manual} and scalar @{ $section->{items} } == 0; | |
504 | ||
505 | my $items = reduce { no warnings 'once'; $a . $b->{text} } | |
506 | ( '', @{ $section->{items} } ); | |
507 | $out .= | |
3c7fa354 | 508 | ( $section->{preceding_text} // '' ) |
5c51ce30 AV |
509 | . $items |
510 | . ( $section->{following_text} // '' ); | |
511 | } | |
512 | return $out; | |
513 | } | |
514 | ||
515 | # given a filename corresponding to an existing perldelta file | |
516 | # and a hashref of the form returned by corelist_delta(), it | |
517 | # returns a string of the resulting file after the module | |
518 | # information has been added. | |
519 | sub transform_pod { | |
520 | my ( $existing, $data ) = @_; | |
521 | ||
522 | # will contain hashrefs corresponding to new, updated and removed | |
523 | # modules and pragmata keyed by section name | |
524 | # each section is hashref of the structure | |
3c7fa354 SH |
525 | # preceding_text => Text occurring before and including the over |
526 | # region containing the list of modules, | |
527 | # items => [Arrayref of hashrefs corresponding to a module | |
528 | # entry], | |
5c51ce30 AV |
529 | # an entry has the form: |
530 | # name => Module name or undef if the name could not be determined | |
531 | # text => The text of the entry, including the item heading | |
532 | # | |
3c7fa354 SH |
533 | # following_text => Any text not corresponding to a module |
534 | # that occurs after the first module | |
5c51ce30 AV |
535 | # |
536 | # the sections are converted to a pod string by calling sections_to_pod() | |
537 | my %sections; | |
538 | ||
539 | # we are in the Modules_and_Pragmata's section | |
540 | my $in_Modules_and_Pragmata; | |
3c7fa354 | 541 | |
5c51ce30 AV |
542 | # we are the Modules_and_Pragmata's section but have not |
543 | # encountered any of the desired sections. We use this | |
544 | # flag to determine whether we should append the text to $out | |
545 | # or we need to delay appending until the module listings are | |
546 | # processed and instead append to $append_to_out | |
547 | my $in_Modules_and_Pragmata_preamble; | |
3c7fa354 | 548 | |
5c51ce30 AV |
549 | my $done_processing_Modules_and_Pragmata; |
550 | ||
551 | my $current_section; | |
3c7fa354 | 552 | |
5c51ce30 AV |
553 | # $nested_element_level == 0 : not in an over region, treat lines as text |
554 | # $nested_element_level == 1 : presumably in the top over region that | |
555 | # corresponds to the module listing. Treat | |
556 | # each item as a module | |
557 | # $nested_element_level > 1 : we only consider these values when we are in an item | |
558 | # We treat lines as the text of the current item. | |
559 | my $nested_element_level = 0; | |
3c7fa354 | 560 | |
5c51ce30 AV |
561 | my $current_item; |
562 | my $need_to_parse_module_name; | |
563 | ||
564 | my $out = ''; | |
565 | my $append_to_out = ''; | |
566 | ||
567 | open my $fh, '<', $existing or die "can't open perldelta file $existing: $!"; | |
9c7cb461 | 568 | binmode($fh); |
5c51ce30 AV |
569 | |
570 | while (<$fh>) { | |
571 | # treat the rest of the file as plain text | |
572 | if ($done_processing_Modules_and_Pragmata) { | |
573 | $out .= $_; | |
574 | next; | |
575 | } | |
576 | ||
577 | elsif ( !$in_Modules_and_Pragmata ) { | |
578 | # entering Modules and Pragmata | |
579 | if (/^=head1 Modules and Pragmata/) { | |
580 | $in_Modules_and_Pragmata = 1; | |
581 | $in_Modules_and_Pragmata_preamble = 1; | |
582 | } | |
583 | $out .= $_; | |
584 | next; | |
585 | } | |
586 | ||
587 | # leaving Modules and Pragmata | |
588 | elsif (/^=head1/) { | |
589 | if ($current_section) { | |
590 | push @{ $current_section->{items} }, $current_item | |
591 | if $current_item; | |
592 | $sections{ $current_section->{name} } = $current_section; | |
593 | } | |
594 | $done_processing_Modules_and_Pragmata = 1; | |
595 | $out .= | |
596 | sections_to_pod( $data, %sections ) . $append_to_out . $_; | |
597 | next; | |
598 | } | |
599 | ||
600 | # new section in Modules and Pragmata | |
601 | elsif (/^=head2 (.*?)$/) { | |
602 | my $name = $1; | |
603 | if ($current_section) { | |
604 | push @{ $current_section->{items} }, $current_item | |
605 | if $current_item; | |
606 | $sections{ $current_section->{name} } = $current_section; | |
607 | undef $current_section; | |
608 | } | |
609 | ||
610 | if ( is_desired_section_name($name) ) { | |
611 | undef $in_Modules_and_Pragmata_preamble; | |
612 | if ( $nested_element_level > 0 ) { | |
613 | die "Unexpected head2 at line no. $."; | |
614 | } | |
615 | my $title = get_section_name_from_heading($name); | |
616 | if ( exists $sections{$title} ) { | |
3c7fa354 | 617 | die "$name occurred twice at line no. $."; |
5c51ce30 | 618 | } |
3c7fa354 SH |
619 | $current_section = {}; |
620 | $current_section->{name} = $title; | |
621 | $current_section->{preceding_text} = $_; | |
622 | $current_section->{items} = []; | |
623 | $nested_element_level = 0; | |
5c51ce30 AV |
624 | next; |
625 | } | |
626 | ||
627 | # otherwise treat section as plain text | |
628 | else { | |
629 | if ($in_Modules_and_Pragmata_preamble) { | |
630 | $out .= $_; | |
631 | } | |
632 | else { | |
633 | $append_to_out .= $_; | |
634 | } | |
635 | next; | |
636 | } | |
637 | } | |
638 | ||
639 | elsif ($current_section) { | |
640 | ||
641 | # not in an over region | |
642 | if ( $nested_element_level == 0 ) { | |
643 | if (/^=over/) { | |
644 | $nested_element_level++; | |
645 | } | |
646 | if ( scalar @{ $current_section->{items} } > 0 ) { | |
647 | $current_section->{following_text} .= $_; | |
648 | } | |
649 | else { | |
3c7fa354 | 650 | $current_section->{preceding_text} .= $_; |
5c51ce30 AV |
651 | } |
652 | next; | |
653 | } | |
654 | ||
655 | if ($current_item) { | |
656 | if ($need_to_parse_module_name) { | |
657 | # the item may not have a parsable module name, which means that | |
658 | # $current_item->{name} will never be defined. | |
659 | if (/^(?:L|C)<(.+?)>/) { | |
660 | $current_item->{name} = $1; | |
661 | undef $need_to_parse_module_name; | |
662 | } | |
663 | # =item or =back signals the end of an item | |
664 | # block, which we handle below | |
665 | if ( !/^=(?:item|back)/ ) { | |
666 | $current_item->{text} .= $_; | |
667 | next; | |
668 | } | |
669 | } | |
670 | # currently in an over region | |
671 | # treat text inside region as plain text | |
672 | if ( $nested_element_level > 1 ) { | |
673 | if (/^=back/) { | |
674 | $nested_element_level--; | |
675 | } | |
676 | elsif (/^=over/) { | |
677 | $nested_element_level++; | |
678 | } | |
679 | $current_item->{text} .= $_; | |
680 | next; | |
681 | } | |
682 | # entering over region | |
683 | if (/^=over/) { | |
684 | $nested_element_level++; | |
685 | $current_item->{text} .= $_; | |
686 | next; | |
687 | } | |
688 | # =item or =back signals the end of an item | |
689 | # block, which we handle below | |
690 | if ( !/^=(?:item|back)/ ) { | |
691 | $current_item->{text} .= $_; | |
692 | next; | |
693 | } | |
694 | } | |
695 | ||
696 | if (/^=item \*/) { | |
697 | push @{ $current_section->{items} }, $current_item | |
698 | if $current_item; | |
699 | $current_item = { text => $_ }; | |
700 | $need_to_parse_module_name = 1; | |
701 | next; | |
702 | } | |
703 | ||
704 | if (/^=back/) { | |
705 | push @{ $current_section->{items} }, $current_item | |
706 | if $current_item; | |
707 | undef $current_item; | |
708 | $nested_element_level--; | |
709 | } | |
710 | ||
711 | if ( scalar @{ $current_section->{items} } == 0 ) { | |
3c7fa354 | 712 | $current_section->{preceding_text} .= $_; |
5c51ce30 AV |
713 | } |
714 | else { | |
715 | $current_section->{following_text} .= $_; | |
716 | } | |
717 | next; | |
718 | } | |
719 | ||
720 | # text in Modules and Pragmata not in a head2 region | |
721 | else { | |
722 | if ($in_Modules_and_Pragmata_preamble) { | |
723 | $out .= $_; | |
724 | } | |
725 | else { | |
726 | $append_to_out .= $_; | |
727 | } | |
728 | next; | |
729 | } | |
730 | } | |
731 | close $fh; | |
732 | die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata; | |
733 | return $out; | |
734 | } | |
735 | ||
736 | } | |
737 | ||
738 | { | |
93b00636 FR |
739 | package DeltaParser; |
740 | use Pod::Simple::SimpleTree; | |
741 | ||
742 | sub new { | |
743 | my ($class, $input) = @_; | |
744 | ||
745 | my $self = bless {} => $class; | |
746 | ||
747 | my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root; | |
748 | splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure, | |
8d132c8e | 749 | # just the nodes within it |
93b00636 FR |
750 | |
751 | $self->_parse_delta($parsed_pod); | |
752 | ||
753 | return $self; | |
754 | } | |
755 | ||
c014dfad AV |
756 | # creates the accessor methods: |
757 | # new_modules | |
758 | # updated_modules | |
759 | # removed_modules | |
93b00636 FR |
760 | for my $k (keys %sections) { |
761 | no strict 'refs'; | |
762 | my $m = "${k}_modules"; | |
763 | *$m = sub { $_[0]->{$m} }; | |
764 | } | |
765 | ||
766 | sub _parse_delta { | |
767 | my ($self, $pod) = @_; | |
768 | ||
c014dfad AV |
769 | my $new_section = $self->_look_for_section( $pod, $sections{new} ); |
770 | my $updated_section = $self->_look_for_section( $pod, $sections{updated} ); | |
771 | my $removed_section = $self->_look_for_section( $pod, $sections{removed} ); | |
772 | ||
773 | $self->_parse_new_section($new_section); | |
774 | $self->_parse_updated_section($updated_section); | |
775 | $self->_parse_removed_section($removed_section); | |
776 | ||
777 | for (qw/new_modules updated_modules removed_modules/) { | |
778 | $self->{$_} = | |
779 | [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ]; | |
93b00636 FR |
780 | } |
781 | ||
782 | return; | |
783 | } | |
784 | ||
785 | sub _parse_new_section { | |
786 | my ($self, $section) = @_; | |
787 | ||
c014dfad AV |
788 | $self->{new_modules} = []; |
789 | return unless $section; | |
93b00636 FR |
790 | $self->{new_modules} = $self->_parse_section($section => sub { |
791 | my ($el) = @_; | |
792 | ||
793 | my ($first, $second) = @{ $el }[2, 3]; | |
794 | my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/; | |
795 | ||
796 | return [ $first->[2], undef, $ver ]; | |
797 | }); | |
798 | ||
799 | return; | |
800 | } | |
801 | ||
802 | sub _parse_updated_section { | |
803 | my ($self, $section) = @_; | |
804 | ||
3668a9e5 AV |
805 | $self->{updated_modules} = []; |
806 | return unless $section; | |
93b00636 FR |
807 | $self->{updated_modules} = $self->_parse_section($section => sub { |
808 | my ($el) = @_; | |
809 | ||
810 | my ($first, $second) = @{ $el }[2, 3]; | |
811 | my $module = $first->[2]; | |
3668a9e5 AV |
812 | |
813 | # the regular expression matches the following: | |
814 | # from VERSION_NUMBER to VERSION_NUMBER | |
815 | # from VERSION_NUMBER to VERSION_NUMBER. | |
816 | # from version VERSION_NUMBER to version VERSION_NUMBER. | |
817 | # from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER | |
818 | # from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER | |
819 | # | |
3c7fa354 | 820 | # some perldeltas contain more than one module listed in an entry, this only attempts to match the |
3668a9e5 | 821 | # first module |
93b00636 | 822 | my ($old, $new) = $second =~ |
3668a9e5 | 823 | /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s; |
93b00636 FR |
824 | |
825 | warn "Unable to extract old or new version of $module from perldelta" | |
826 | if !defined $old || !defined $new; | |
827 | ||
828 | return [ $module, $old, $new ]; | |
829 | }); | |
830 | ||
831 | return; | |
832 | } | |
833 | ||
834 | sub _parse_removed_section { | |
835 | my ($self, $section) = @_; | |
c014dfad AV |
836 | |
837 | $self->{removed_modules} = []; | |
838 | return unless $section; | |
93b00636 FR |
839 | $self->{removed_modules} = $self->_parse_section($section => sub { |
840 | my ($el) = @_; | |
841 | ||
842 | my ($first, $second) = @{ $el }[2, 3]; | |
843 | my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/; | |
844 | ||
845 | return [ $first->[2], $old, undef ]; | |
846 | }); | |
847 | ||
848 | return; | |
849 | } | |
850 | ||
851 | sub _parse_section { | |
852 | my ($self, $section, $parser) = @_; | |
853 | ||
854 | my $items = $self->_look_down($section => sub { | |
855 | my ($el) = @_; | |
856 | return unless ref $el && $el->[0] =~ /^item-/ | |
857 | && @{ $el } > 2 && ref $el->[2]; | |
5c404e2c | 858 | return unless $el->[2]->[0] =~ /C|L/; |
93b00636 FR |
859 | |
860 | return 1; | |
861 | }); | |
862 | ||
863 | return [map { $parser->($_) } @{ $items }]; | |
864 | } | |
865 | ||
866 | sub _look_down { | |
867 | my ($self, $pod, $predicate) = @_; | |
868 | my @pod = @{ $pod }; | |
869 | ||
870 | my @l; | |
871 | while (my $el = shift @pod) { | |
872 | push @l, $el if $predicate->($el); | |
873 | if (ref $el) { | |
874 | my @el = @{ $el }; | |
875 | splice @el, 0, 2; | |
876 | unshift @pod, @el if @el; | |
877 | } | |
878 | } | |
879 | ||
880 | return @l ? \@l : undef; | |
881 | } | |
882 | ||
883 | sub _look_for_section { | |
884 | my ($self, $pod, $section) = @_; | |
885 | ||
886 | my $level; | |
887 | $self->_look_for_range($pod, | |
888 | sub { | |
889 | my ($el) = @_; | |
86c08a2c | 890 | my ($heading) = $el->[0] =~ /^head(\d)$/; |
b88937b3 | 891 | my $f = $heading && $el->[2] =~ /^$section/; |
86c08a2c | 892 | $level = $heading if $f && !$level; |
93b00636 FR |
893 | return $f; |
894 | }, | |
895 | sub { | |
896 | my ($el) = @_; | |
897 | $el->[0] =~ /^head(\d)$/ && $1 <= $level; | |
898 | }, | |
899 | ); | |
900 | } | |
901 | ||
902 | sub _look_for_range { | |
903 | my ($self, $pod, $start_predicate, $stop_predicate) = @_; | |
904 | ||
905 | my @l; | |
906 | for my $el (@{ $pod }) { | |
907 | if (@l) { | |
908 | return \@l if $stop_predicate->($el); | |
909 | } | |
910 | else { | |
911 | next unless $start_predicate->($el); | |
912 | } | |
913 | push @l, $el; | |
914 | } | |
915 | ||
916 | return; | |
917 | } | |
2a19fd14 | 918 | } |
88e14305 | 919 | |
2a19fd14 | 920 | run; |