This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Porting/corelist-perldelta.pl - Cleanup DeltaParser
[perl5.git] / Porting / corelist-perldelta.pl
CommitLineData
603f4ea0
DG
1#!perl
2use 5.010;
3use strict;
4use warnings;
5use lib 'Porting';
6use Maintainers qw/%Modules/;
5e0e6da6 7use lib 'dist/Module-CoreList/lib';
603f4ea0 8use Module::CoreList;
2a19fd14 9use Getopt::Long;
e8097ff2
MM
10
11=head1 USAGE
12
13 # generate the module changes for the Perl you are currently building
67e92e89 14 ./perl -Ilib Porting/corelist-perldelta.pl
e8097ff2
MM
15
16 # generate a diff between the corelist sections of two perldelta* files:
80da4b39 17 perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod
e8097ff2
MM
18
19=head1 ABOUT
20
21corelist-perldelta.pl is a bit schizophrenic. The part to generate the
22new Perldelta text does not need Algorithm::Diff, but wants to be
23run with the freshly built Perl.
24
25The part to check the diff wants to be run with a Perl that has an up-to-date
26L<Module::CoreList>, but needs the outside L<Algorithm::Diff>.
27
28Ideally, the program will be split into two separate programs, one
b88937b3 29to generate the text and one to show the diff between the
e8097ff2
MM
30corelist sections of the last perldelta and the next perldelta.
31
32=cut
93b00636
FR
33
34my %sections = (
8d132c8e
MM
35 new => qr/New Modules and Pragma(ta)?/,
36 updated => qr/Updated Modules and Pragma(ta)?/,
37 removed => qr/Removed Modules and Pragma(ta)?/,
38);
39
40my %titles = (
caaa1415
MM
41 new => 'New Modules and Pragmata',
42 updated => 'Updated Modules and Pragmata',
43 removed => 'Removed Modules and Pragmata',
93b00636 44);
603f4ea0 45
88e14305
DG
46my $deprecated;
47
48#--------------------------------------------------------------------------#
49
50sub added {
51 my ($mod, $old_v, $new_v) = @_;
0f200d9f 52 say "=item *\n";
ceadd55d 53 say "L<$mod> $new_v has been added to the Perl core.\n";
88e14305
DG
54}
55
56sub updated {
57 my ($mod, $old_v, $new_v) = @_;
0f200d9f 58 say "=item *\n";
ceadd55d 59 say "L<$mod> has been upgraded from version $old_v to $new_v.\n";
88e14305 60 if ( $deprecated->{$mod} ) {
ceadd55d 61 say "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n";
88e14305
DG
62 }
63}
64
65sub removed {
66 my ($mod, $old_v, $new_v) = @_;
0f200d9f
FR
67 say "=item *\n";
68 say "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n";
88e14305
DG
69}
70
71sub generate_section {
72 my ($title, $item_sub, @mods ) = @_;
73 return unless @mods;
74
75 say "=head2 $title\n";
76 say "=over 4\n";
77
78 for my $tuple ( sort { lc($a->[0]) cmp lc($b->[0]) } @mods ) {
79 my ($mod,$old_v,$new_v) = @$tuple;
80 $old_v //= q('undef');
81 $new_v //= q('undef');
82 $item_sub->($mod, $old_v, $new_v);
83 }
84
85 say "=back\n";
86}
87
88#--------------------------------------------------------------------------#
89
2a19fd14
FR
90sub run {
91 my %opt = (mode => 'generate');
92
93 GetOptions(\%opt,
93b00636 94 'mode|m:s', # 'generate', 'check'
2a19fd14
FR
95 );
96
93b00636
FR
97 # by default, compare latest two version in CoreList;
98 my @versions = sort keys %Module::CoreList::version;
99 my ($old, $new) = (shift @ARGV, shift @ARGV);
100 $old ||= $versions[-2];
101 $new ||= $versions[-1];
2a19fd14 102
93b00636 103 if ( $opt{mode} eq 'generate' ) {
2a19fd14 104 do_generate($old => $new);
88e14305 105 }
93b00636
FR
106 elsif ( $opt{mode} eq 'check' ) {
107 do_check(\*ARGV, $old => $new);
108 }
88e14305 109 else {
2a19fd14
FR
110 die "Unrecognized mode '$opt{mode}'\n";
111 }
112
113 exit 0;
114}
115
93b00636 116sub corelist_delta {
2a19fd14
FR
117 my ($old, $new) = @_;
118 my $corelist = \%Module::CoreList::version;
119
120 $deprecated = $Module::CoreList::deprecated{$new};
121
122 my (@new,@deprecated,@removed,@pragmas,@modules);
123
124 # %Modules defines what is currently in core
125 for my $k ( keys %Modules ) {
126 next unless exists $corelist->{$new}{$k};
88e14305
DG
127 my $old_ver = $corelist->{$old}{$k};
128 my $new_ver = $corelist->{$new}{$k};
2a19fd14
FR
129 # in core but not in last corelist
130 if ( ! exists $corelist->{$old}{$k} ) {
131 push @new, [$k, undef, $new_ver];
88e14305 132 }
2a19fd14 133 # otherwise just pragmas or modules
88e14305 134 else {
2a19fd14
FR
135 my $old_ver = $corelist->{$old}{$k};
136 my $new_ver = $corelist->{$new}{$k};
137 next unless defined $old_ver && defined $new_ver && $old_ver ne $new_ver;
138 my $tuple = [ $k, $old_ver, $new_ver ];
139 if ( $k eq lc $k ) {
140 push @pragmas, $tuple;
141 }
142 else {
143 push @modules, $tuple;
144 }
88e14305
DG
145 }
146 }
603f4ea0 147
2a19fd14
FR
148 # in old corelist, but not this one => removed
149 # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from
150 # distributions will show up here, too. Some person will have to review to see what's
151 # important. That's the best we can do without a historical Maintainers.pl
152 for my $k ( keys %{ $corelist->{$old} } ) {
153 if ( ! exists $corelist->{$new}{$k} ) {
154 push @removed, [$k, $corelist->{$old}{$k}, undef];
155 }
88e14305 156 }
88e14305 157
93b00636
FR
158 return (\@new, \@removed, \@pragmas, \@modules);
159}
160
161sub do_generate {
162 my ($old, $new) = @_;
163 my ($added, $removed, $pragmas, $modules) = corelist_delta($old => $new);
164
8d132c8e
MM
165 generate_section($titles{new}, \&added, @{ $added });
166 generate_section($titles{updated}, \&updated, @{ $pragmas }, @{ $modules });
167 generate_section($titles{removed}, \&removed, @{ $removed });
93b00636
FR
168}
169
170sub do_check {
171 my ($in, $old, $new) = @_;
172
173 my $delta = DeltaParser->new($in);
174 my ($added, $removed, $pragmas, $modules) = corelist_delta($old => $new);
175
176 for my $ck (['new', $delta->new_modules, $added],
177 ['removed', $delta->removed_modules, $removed],
178 ['updated', $delta->updated_modules, [@{ $modules }, @{ $pragmas }]]) {
179 my @delta = @{ $ck->[1] };
180 my @corelist = sort { lc $a->[0] cmp lc $b->[0] } @{ $ck->[2] };
181
182 printf $ck->[0] . ":\n";
183
e8097ff2 184 require Algorithm::Diff;
93b00636
FR
185 my $diff = Algorithm::Diff->new(map {
186 [map { join q{ } => grep defined, @{ $_ } } @{ $_ }]
187 } \@delta, \@corelist);
188
189 while ($diff->Next) {
190 next if $diff->Same;
191 my $sep = '';
192 if (!$diff->Items(2)) {
193 printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 ));
194 } elsif(!$diff->Items(1)) {
195 printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 ));
196 } else {
197 $sep = "---\n";
198 printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 ));
199 }
200 print "< $_\n" for $diff->Items(1);
201 print $sep;
202 print "> $_\n" for $diff->Items(2);
203 }
204
205 print "\n";
206 }
207}
208
209{
210 package DeltaParser;
211 use Pod::Simple::SimpleTree;
212
213 sub new {
214 my ($class, $input) = @_;
215
216 my $self = bless {} => $class;
217
218 my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root;
219 splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure,
8d132c8e 220 # just the nodes within it
93b00636
FR
221
222 $self->_parse_delta($parsed_pod);
223
224 return $self;
225 }
226
c014dfad
AV
227 # creates the accessor methods:
228 # new_modules
229 # updated_modules
230 # removed_modules
93b00636
FR
231 for my $k (keys %sections) {
232 no strict 'refs';
233 my $m = "${k}_modules";
234 *$m = sub { $_[0]->{$m} };
235 }
236
237 sub _parse_delta {
238 my ($self, $pod) = @_;
239
c014dfad
AV
240 my $new_section = $self->_look_for_section( $pod, $sections{new} );
241 my $updated_section = $self->_look_for_section( $pod, $sections{updated} );
242 my $removed_section = $self->_look_for_section( $pod, $sections{removed} );
243
244 $self->_parse_new_section($new_section);
245 $self->_parse_updated_section($updated_section);
246 $self->_parse_removed_section($removed_section);
247
248 for (qw/new_modules updated_modules removed_modules/) {
249 $self->{$_} =
250 [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ];
93b00636
FR
251 }
252
253 return;
254 }
255
256 sub _parse_new_section {
257 my ($self, $section) = @_;
258
c014dfad
AV
259 $self->{new_modules} = [];
260 return unless $section;
93b00636
FR
261 $self->{new_modules} = $self->_parse_section($section => sub {
262 my ($el) = @_;
263
264 my ($first, $second) = @{ $el }[2, 3];
265 my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/;
266
267 return [ $first->[2], undef, $ver ];
268 });
269
270 return;
271 }
272
273 sub _parse_updated_section {
274 my ($self, $section) = @_;
275
3668a9e5
AV
276 $self->{updated_modules} = [];
277 return unless $section;
93b00636
FR
278 $self->{updated_modules} = $self->_parse_section($section => sub {
279 my ($el) = @_;
280
281 my ($first, $second) = @{ $el }[2, 3];
282 my $module = $first->[2];
3668a9e5
AV
283
284 # the regular expression matches the following:
285 # from VERSION_NUMBER to VERSION_NUMBER
286 # from VERSION_NUMBER to VERSION_NUMBER.
287 # from version VERSION_NUMBER to version VERSION_NUMBER.
288 # from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER
289 # from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER
290 #
291 # some perldelta contain more than one module listed in an entry, this only attempts to match the
292 # first module
93b00636 293 my ($old, $new) = $second =~
3668a9e5 294 /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
93b00636
FR
295
296 warn "Unable to extract old or new version of $module from perldelta"
297 if !defined $old || !defined $new;
298
299 return [ $module, $old, $new ];
300 });
301
302 return;
303 }
304
305 sub _parse_removed_section {
306 my ($self, $section) = @_;
c014dfad
AV
307
308 $self->{removed_modules} = [];
309 return unless $section;
93b00636
FR
310 $self->{removed_modules} = $self->_parse_section($section => sub {
311 my ($el) = @_;
312
313 my ($first, $second) = @{ $el }[2, 3];
314 my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/;
315
316 return [ $first->[2], $old, undef ];
317 });
318
319 return;
320 }
321
322 sub _parse_section {
323 my ($self, $section, $parser) = @_;
324
325 my $items = $self->_look_down($section => sub {
326 my ($el) = @_;
327 return unless ref $el && $el->[0] =~ /^item-/
328 && @{ $el } > 2 && ref $el->[2];
5c404e2c 329 return unless $el->[2]->[0] =~ /C|L/;
93b00636
FR
330
331 return 1;
332 });
333
334 return [map { $parser->($_) } @{ $items }];
335 }
336
337 sub _look_down {
338 my ($self, $pod, $predicate) = @_;
339 my @pod = @{ $pod };
340
341 my @l;
342 while (my $el = shift @pod) {
343 push @l, $el if $predicate->($el);
344 if (ref $el) {
345 my @el = @{ $el };
346 splice @el, 0, 2;
347 unshift @pod, @el if @el;
348 }
349 }
350
351 return @l ? \@l : undef;
352 }
353
354 sub _look_for_section {
355 my ($self, $pod, $section) = @_;
356
357 my $level;
358 $self->_look_for_range($pod,
359 sub {
360 my ($el) = @_;
86c08a2c 361 my ($heading) = $el->[0] =~ /^head(\d)$/;
b88937b3 362 my $f = $heading && $el->[2] =~ /^$section/;
86c08a2c 363 $level = $heading if $f && !$level;
93b00636
FR
364 return $f;
365 },
366 sub {
367 my ($el) = @_;
368 $el->[0] =~ /^head(\d)$/ && $1 <= $level;
369 },
370 );
371 }
372
373 sub _look_for_range {
374 my ($self, $pod, $start_predicate, $stop_predicate) = @_;
375
376 my @l;
377 for my $el (@{ $pod }) {
378 if (@l) {
379 return \@l if $stop_predicate->($el);
380 }
381 else {
382 next unless $start_predicate->($el);
383 }
384 push @l, $el;
385 }
386
387 return;
388 }
2a19fd14 389}
88e14305 390
2a19fd14 391run;