perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!perl
	2	use 5.010;
	3	use strict;
	4	use warnings;
	5	use lib 'Porting';
	6	use Maintainers qw/%Modules/;
	7	use lib 'dist/Module-CoreList/lib';
	8	use Module::CoreList;
	9	use Getopt::Long;
	10
	11	=head1 USAGE
	12
	13	# generate the module changes for the Perl you are currently building
	14	./perl -Ilib Porting/corelist-perldelta.pl
	15
	16	# update the module changes for the Perl you are currently building
	17	./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod
	18
	19	# generate a diff between the corelist sections of two perldelta* files:
	20	perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod
	21
	22	=head1 ABOUT
	23
	24	corelist-perldelta.pl is a bit schizophrenic. The part to generate the
	25	new Perldelta text does not need Algorithm::Diff, but wants to be
	26	run with the freshly built Perl.
	27
	28	The part to check the diff wants to be run with a Perl that has an up-to-date
	29	L<Module::CoreList>, but needs the outside L<Algorithm::Diff>.
	30
	31	Ideally, the program will be split into two separate programs, one
	32	to generate the text and one to show the diff between the
	33	corelist sections of the last perldelta and the next perldelta.
	34
	35	Currently no information about Removed Modules is displayed in any of the
	36	modes.
	37
	38	=cut
	39
	40	my %sections = (
	41	new => qr/New Modules and Pragma(ta)?/,
	42	updated => qr/Updated Modules and Pragma(ta)?/,
	43	removed => qr/Removed Modules and Pragma(ta)?/,
	44	);
	45
	46	my %titles = (
	47	new => 'New Modules and Pragmata',
	48	updated => 'Updated Modules and Pragmata',
	49	removed => 'Removed Modules and Pragmata',
	50	);
	51
	52	my $deprecated;
	53
	54	sub run {
	55	my %opt = (mode => 'generate');
	56
	57	GetOptions(\%opt,
	58	'mode\|m:s', # 'generate', 'check', 'update'
	59	);
	60
	61	# by default, compare latest two version in CoreList;
	62	my ($old, $new) = latest_two_perl_versions();
	63
	64	# use the provided versions if present
	65	# @ARGV >=2 means [old_version] [new_version] [path/to/file]
	66	if ( @ARGV >= 2) {
	67	($old, $new) = (shift @ARGV, shift @ARGV);
	68	die "$old is an invalid version\n" if not exists
	69	$Module::CoreList::version{$old};
	70	die "$new is an invalid version\n" if not exists
	71	$Module::CoreList::version{$new};
	72	}
	73
	74	if ( $opt{mode} eq 'generate' ) {
	75	do_generate($old => $new);
	76	}
	77	elsif ( $opt{mode} eq 'check' ) {
	78	do_check(\*ARGV, $old => $new);
	79	}
	80	elsif ( $opt{mode} eq 'update' ) {
	81	do_update_existing(shift @ARGV, $old => $new);
	82	}
	83	else {
	84	die "Unrecognized mode '$opt{mode}'\n";
	85	}
	86
	87	exit 0;
	88	}
	89
	90	sub latest_two_perl_versions {
	91
	92	my @versions = sort keys %Module::CoreList::version;
	93
	94	my $new = pop @versions;
	95
	96	# If a fully-padded version number ends in a zero (as in "5.019010"), that
	97	# version shows up in %Module::CoreList::version both with and without its
	98	# trailing zeros. So skip all versions that are numerically equal to $new.
	99	pop @versions while @versions && $versions[-1] == $new;
	100
	101	die "Too few distinct core versions in %Module::CoreList::version ?!\n"
	102	if !@versions;
	103
	104	return $versions[-1], $new;
	105	}
	106
	107	# Given two perl versions, it returns a list describing the core distributions that have changed.
	108	# The first three elements are hashrefs corresponding to new, updated, and removed modules
	109	# and are of the form (mostly, see the special remarks about removed):
	110	# 'Distribution Name' => ['Distribution Name', previous version number, current version number]
	111	# where the version number is undef if the distribution did not exist.
	112	# The fourth element is an arrayref of core distribution names of those distribution for which it
	113	# is unknown whether they have changed and therefore need to be manually checked.
	114	#
	115	# In most cases, the distribution name in %Modules corresponds to the module that is representative
	116	# of the distribution as listed in Module::CoreList. However, there are a few distribution names
	117	# that do not correspond to a module. %distToModules has been created which maps the distribution
	118	# name to a representative module. The representative module was chosen by either looking at the
	119	# Makefile of the distribution or by seeing which module the distribution has been traditionally
	120	# listed under in past perldeltas.
	121	#
	122	# There are a few distributions for which there is no single representative module (e.g. libnet).
	123	# These distributions are returned as the last element of the list.
	124	#
	125	# %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p.
	126	# This list contains modules and pragmata that may also be present in Module::CoreList.
	127	# A list of modules are in the list @unclaimedModules, which were manually listed based on whether
	128	# they were independent modules and whether they have been listed in past perldeltas.
	129	# The pragmata were found by doing something like:
	130	# say for sort grep { $_ eq lc $_ and !exists $Modules{$_}}
	131	# keys %{$Module::CoreList::version{'5.019003'}}
	132	# and manually filtering out pragamata that were already covered.
	133	#
	134	# It is currently not possible to differentiate between a removed module and a removed
	135	# distribution. Therefore, the removed hashref contains every module that has been removed, even if
	136	# the module's corresponding distribution has not been removed.
	137
	138	sub corelist_delta {
	139	my ($old, $new) = @_;
	140	my $corelist = \%Module::CoreList::version;
	141	my %changes = Module::CoreList::changes_between( $old, $new );
	142	$deprecated = $Module::CoreList::deprecated{$new};
	143
	144	my $getModifyType = sub {
	145	my $data = shift;
	146	if ( exists $data->{left} and exists $data->{right} ) {
	147	return 'updated';
	148	}
	149	elsif ( !exists $data->{left} and exists $data->{right} ) {
	150	return 'new';
	151	}
	152	elsif ( exists $data->{left} and !exists $data->{right} ) {
	153	return 'removed';
	154	}
	155	return undef;
	156	};
	157
	158	my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap Win32CORE/;
	159	my @unclaimedPragmata = qw/arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/;
	160	my @unclaimed = (@unclaimedModules, @unclaimedPragmata);
	161
	162	my %distToModules = (
	163	'IO-Compress' => [
	164	{
	165	'name' => 'IO-Compress',
	166	'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ),
	167	'data' => $changes{'IO::Compress::Base'}
	168	}
	169	],
	170	'Locale-Codes' => [
	171	{
	172	'name' => 'Locale::Codes',
	173	'modification' => $getModifyType->( $changes{'Locale::Codes'} ),
	174	'data' => $changes{'Locale::Codes'}
	175	}
	176	],
	177	'PathTools' => [
	178	{
	179	'name' => 'File::Spec',
	180	'modification' => $getModifyType->( $changes{'Cwd'} ),
	181	'data' => $changes{'Cwd'}
	182	}
	183	],
	184	'Scalar-List-Utils' => [
	185	{
	186	'name' => 'List::Util',
	187	'modification' => $getModifyType->( $changes{'List::Util'} ),
	188	'data' => $changes{'List::Util'}
	189	},
	190	{
	191	'name' => 'Scalar::Util',
	192	'modification' => $getModifyType->( $changes{'Scalar::Util'} ),
	193	'data' => $changes{'Scalar::Util'}
	194	}
	195	],
	196	'Text-Tabs+Wrap' => [
	197	{
	198	'name' => 'Text::Tabs',
	199	'modification' => $getModifyType->( $changes{'Text::Tabs'} ),
	200	'data' => $changes{'Text::Tabs'}
	201	},
	202	{
	203	'name' => 'Text::Wrap',
	204	'modification' => $getModifyType->( $changes{'Text::Wrap'} ),
	205	'data' => $changes{'Text::Wrap'}
	206	}
	207	],
	208	);
	209
	210	# structure is (new\|removed\|updated) => [ [ModuleName, previousVersion, newVersion] ]
	211	my $deltaGrouping = {};
	212
	213	# list of distributions listed in %Modules that need to be manually checked because there is no module that represents it
	214	my @manuallyCheck;
	215
	216	# %Modules defines what is currently in core
	217	for my $k ( keys %Modules ) {
	218	next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed
	219	next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed
	220
	221	my ( $distName, $modifyType, $data );
	222
	223	if ( exists $changes{$k} ) {
	224	$distName = $k;
	225	$modifyType = $getModifyType->( $changes{$k} );
	226	$data = $changes{$k};
	227	}
	228	elsif ( exists $distToModules{$k} ) {
	229	# modification will be undef if the distribution has not changed
	230	my @modules = grep { $_->{modification} } @{ $distToModules{$k} };
	231	for (@modules) {
	232	$deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ];
	233	}
	234	next;
	235	}
	236	else {
	237	push @manuallyCheck, $k and next;
	238	}
	239
	240	$deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ];
	241	}
	242
	243	for my $k (@unclaimed) {
	244	if ( exists $changes{$k} ) {
	245	$deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} =
	246	[ $k, $changes{$k}->{left}, $changes{$k}->{right} ];
	247	}
	248	}
	249
	250	# in old corelist, but not this one => removed
	251	# N.B. This is exhaustive -- not just what's in %Modules, so modules removed from
	252	# distributions will show up here, too. Some person will have to review to see what's
	253	# important. That's the best we can do without a historical Maintainers.pl
	254	for my $k ( keys %{ $corelist->{$old} } ) {
	255	if ( ! exists $corelist->{$new}{$k} ) {
	256	$deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ];
	257	}
	258	}
	259
	260	return (
	261	\%{ $deltaGrouping->{'new'} },
	262	\%{ $deltaGrouping->{'removed'} },
	263	\%{ $deltaGrouping->{'updated'} },
	264	\@manuallyCheck
	265	);
	266	}
	267
	268	# currently does not update the Removed Module section
	269	sub do_update_existing {
	270	my ( $existing, $old, $new ) = @_;
	271
	272	my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new );
	273	if ($manuallyCheck) {
	274	print "It cannot be determined whether the following distributions have changed.\n";
	275	print "Please check and list accordingly:\n";
	276	say "\t* $_" for sort @{$manuallyCheck};
	277	print "\n";
	278	}
	279
	280	my $data = {
	281	new => $added,
	282	updated => $updated,
	283	#removed => $removed, ignore removed for now
	284	};
	285
	286	my $text = DeltaUpdater::transform_pod( $existing, $data );
	287	open my $out, '>', $existing or die "can't open perldelta file $existing: $!";
	288	binmode($out);
	289	print $out $text;
	290	close $out;
	291	say "The New and Updated Modules and Pragamata sections in $existing have been updated";
	292	say "Please ensure the Removed Modules and Pragmata section is up-to-date";
	293	}
	294
	295	sub do_generate {
	296	my ($old, $new) = @_;
	297	my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new);
	298
	299	if ($manuallyCheck) {
	300	print "\nXXXIt cannot be determined whether the following distributions have changed.\n";
	301	print "Please check and list accordingly:\n";
	302	say "\t$_" for @{$manuallyCheck};
	303	print "\n";
	304	}
	305
	306	my $data = {
	307	new => $added,
	308	updated => $updated,
	309	#removed => $removed, ignore removed for now
	310	};
	311
	312	say DeltaUpdater::sections_to_pod($data)
	313	}
	314
	315	sub do_check {
	316	my ($in, $old, $new) = @_;
	317
	318	my $delta = DeltaParser->new($in);
	319	my ($added, $removed, $updated) = corelist_delta($old => $new);
	320
	321	# because of the difficulty in identifying the distribution for removed modules
	322	# don't bother checking them
	323	for my $ck ([ 'new', $delta->new_modules, $added ],
	324	#[ 'removed', $delta->removed_modules, $removed ],
	325	[ 'updated', $delta->updated_modules, $updated ] ) {
	326	my @delta = @{ $ck->[1] };
	327	my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] };
	328
	329	printf $ck->[0] . ":\n";
	330
	331	require Algorithm::Diff;
	332	my $diff = Algorithm::Diff->new(map {
	333	[map { join q{ } => grep defined, @{ $_ } } @{ $_ }]
	334	} \@delta, \@corelist);
	335
	336	while ($diff->Next) {
	337	next if $diff->Same;
	338	my $sep = '';
	339	if (!$diff->Items(2)) {
	340	printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 ));
	341	} elsif(!$diff->Items(1)) {
	342	printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 ));
	343	} else {
	344	$sep = "---\n";
	345	printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 ));
	346	}
	347	print "Delta< $_\n" for $diff->Items(1);
	348	print $sep;
	349	print "Corelist> $_\n" for $diff->Items(2);
	350	}
	351
	352	print "\n";
	353	}
	354	}
	355
	356	{
	357
	358	package DeltaUpdater;
	359	use List::Util 'reduce';
	360
	361	sub get_section_name_from_heading {
	362	my $heading = shift;
	363	while (my ($key, $expression) = each %sections) {
	364	if ($heading =~ $expression) {
	365	return $titles{$key};
	366	}
	367	}
	368	die "$heading did not match any section";
	369	}
	370
	371	sub is_desired_section_name {
	372	for (values %sections) {
	373	return 1 if $_[0] =~ $_;
	374	}
	375	return 0;
	376	}
	377
	378	# verify the module and pragmata in the section, changing the stated version if necessary
	379	# this subroutine warns if the module name cannot be parsed or if it is not listed in
	380	# the results returned from corelist_delta()
	381	#
	382	# a side-effect of calling this function is that modules present in the section are
	383	# removed from $data, resulting in $data containing only those modules and pragmata
	384	# that were not listed in the perldelta file. This means we can then pass $data to
	385	# add_to_section() without worrying about filtering out duplicates
	386	sub update_section {
	387	my ( $section, $data, $title ) = @_;
	388	my @items = @{ $section->{items} };
	389
	390	for my $item (@items) {
	391
	392	my $content = $item->{text};
	393	my $module = $item->{name};
	394
	395	#skip dummy items
	396	next if !$module and $content =~ /\sxx\s*/i;
	397
	398	say "Could not parse module name; line is:\n\t$content" and next unless $module;
	399
	400	if ( !$data->{$title}{$module} ) {
	401	print "$module is not listed as being $title in Module::CoreList.\n";
	402	print "Ensure Module::CoreList has been updated and\n";
	403	print "check to see that the distribution is not listed under another name.\n\n";
	404	next;
	405	}
	406
	407	if ( $title eq 'new' ) {
	408	my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m;
	409	say "Could not parse new version for $module; line is:\n\t$content" and next unless $new;
	410	if ( $data->{$title}{$module}[2] ne $new ) {
	411	say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
	412	}
	413	$content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me;
	414	}
	415
	416	elsif ( $title eq 'updated' ) {
	417	my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]\|\.\s\|\.$\|$).*/s;
	418	say "Could not parse old and new version for $module; line is:\n\t$content" and next
	419	unless $prev and $new;
	420	if ( $data->{$title}{$module}[1] ne $prev ) {
	421	say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1];
	422	}
	423	if ( $data->{$title}{$module}[2] ne $new ) {
	424	say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
	425	}
	426	$content =~
	427	s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]\|\.\s\|\.$\|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se;
	428	}
	429
	430	elsif ( $title eq 'removed' ) {
	431	my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m;
	432	say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev;
	433	if ( $data->{$title}{$module}[1] ne $prev ) {
	434	say "$module: previous version differs; $prev " . $data->{$title}{$module}[1];
	435	}
	436	$content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me;
	437	}
	438
	439	delete $data->{$title}{$module};
	440	$item->{text} = $content;
	441	}
	442	return $section;
	443	}
	444
	445	# add modules and pragmata present in $data to the section
	446	sub add_to_section {
	447	my ( $section, $data, $title ) = @_;
	448
	449	#undef is a valid version name in Module::CoreList so suppress warnings about concatenating undef values
	450	no warnings 'uninitialized';
	451	for ( values %{ $data->{$title} } ) {
	452	my ( $mod, $old_v, $new_v ) = @{$_};
	453	my ( $item, $text );
	454
	455	$item = { name => $mod, text => "=item *\n" };
	456	if ( $title eq 'new' ) {
	457	$text = "L<$mod> $new_v has been added to the Perl core.\n";
	458	}
	459
	460	elsif ( $title eq 'updated' ) {
	461	$text = "L<$mod> has been upgraded from version $old_v to $new_v.\n";
	462	if ( $deprecated->{$mod} ) {
	463	$text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n";
	464	}
	465	}
	466
	467	elsif ( $title eq 'removed' ) {
	468	$text = "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n";
	469	}
	470
	471	$item->{text} .= "\n$text\n";
	472	push @{ $section->{items} }, $item;
	473	}
	474	return $section;
	475	}
	476
	477	sub sort_items_in_section {
	478	my ($section) = @_;
	479
	480	# if we could not parse the module name, it will be uninitalized
	481	# in sort. This is not a problem as it will just result in these
	482	# sections being placed near the beginning of the section
	483	no warnings 'uninitialized';
	484	$section->{items} =
	485	[ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ];
	486	return $section;
	487	}
	488
	489	# given a hashref of the form returned by corelist_delta()
	490	# and a hash structured as documented in transform_pod(), it returns
	491	# a pod string representation of the sections, creating sections
	492	# if necessary
	493	sub sections_to_pod {
	494	my ( $data, %sections ) = @_;
	495	my $out = '';
	496
	497	for (
	498	(
	499	[ 'New Modules and Pragmata', 'new' ],
	500	[ 'Updated Modules and Pragmata', 'updated' ],
	501	[ 'Removed Modules and Pragmata', 'removed' ]
	502	)
	503	)
	504	{
	505	my ( $section_name, $title ) = @{$_};
	506
	507	my $section = $sections{$section_name} // {
	508	name => $section_name,
	509	preceding_text => "=head2 $_->[0]\n=over 4\n",
	510	following_text => "=back\n",
	511	items => [],
	512	manual => 1
	513	};
	514
	515	$section = update_section( $section, $data, $title );
	516	$section = add_to_section( $section, $data, $title );
	517	$section = sort_items_in_section( $section );
	518
	519	next if $section->{manual} and scalar @{ $section->{items} } == 0;
	520
	521	my $items = reduce { no warnings 'once'; $a . $b->{text} }
	522	( '', @{ $section->{items} } );
	523	$out .=
	524	( $section->{preceding_text} // '' )
	525	. $items
	526	. ( $section->{following_text} // '' );
	527	}
	528	return $out;
	529	}
	530
	531	# given a filename corresponding to an existing perldelta file
	532	# and a hashref of the form returned by corelist_delta(), it
	533	# returns a string of the resulting file after the module
	534	# information has been added.
	535	sub transform_pod {
	536	my ( $existing, $data ) = @_;
	537
	538	# will contain hashrefs corresponding to new, updated and removed
	539	# modules and pragmata keyed by section name
	540	# each section is hashref of the structure
	541	# preceding_text => Text occurring before and including the over
	542	# region containing the list of modules,
	543	# items => [Arrayref of hashrefs corresponding to a module
	544	# entry],
	545	# an entry has the form:
	546	# name => Module name or undef if the name could not be determined
	547	# text => The text of the entry, including the item heading
	548	#
	549	# following_text => Any text not corresponding to a module
	550	# that occurs after the first module
	551	#
	552	# the sections are converted to a pod string by calling sections_to_pod()
	553	my %sections;
	554
	555	# we are in the Modules_and_Pragmata's section
	556	my $in_Modules_and_Pragmata;
	557
	558	# we are the Modules_and_Pragmata's section but have not
	559	# encountered any of the desired sections. We use this
	560	# flag to determine whether we should append the text to $out
	561	# or we need to delay appending until the module listings are
	562	# processed and instead append to $append_to_out
	563	my $in_Modules_and_Pragmata_preamble;
	564
	565	my $done_processing_Modules_and_Pragmata;
	566
	567	my $current_section;
	568
	569	# $nested_element_level == 0 : not in an over region, treat lines as text
	570	# $nested_element_level == 1 : presumably in the top over region that
	571	# corresponds to the module listing. Treat
	572	# each item as a module
	573	# $nested_element_level > 1 : we only consider these values when we are in an item
	574	# We treat lines as the text of the current item.
	575	my $nested_element_level = 0;
	576
	577	my $current_item;
	578	my $need_to_parse_module_name;
	579
	580	my $out = '';
	581	my $append_to_out = '';
	582
	583	open my $fh, '<', $existing or die "can't open perldelta file $existing: $!";
	584	binmode($fh);
	585
	586	while (<$fh>) {
	587	# treat the rest of the file as plain text
	588	if ($done_processing_Modules_and_Pragmata) {
	589	$out .= $_;
	590	next;
	591	}
	592
	593	elsif ( !$in_Modules_and_Pragmata ) {
	594	# entering Modules and Pragmata
	595	if (/^=head1 Modules and Pragmata/) {
	596	$in_Modules_and_Pragmata = 1;
	597	$in_Modules_and_Pragmata_preamble = 1;
	598	}
	599	$out .= $_;
	600	next;
	601	}
	602
	603	# leaving Modules and Pragmata
	604	elsif (/^=head1/) {
	605	if ($current_section) {
	606	push @{ $current_section->{items} }, $current_item
	607	if $current_item;
	608	$sections{ $current_section->{name} } = $current_section;
	609	}
	610	$done_processing_Modules_and_Pragmata = 1;
	611	$out .=
	612	sections_to_pod( $data, %sections ) . $append_to_out . $_;
	613	next;
	614	}
	615
	616	# new section in Modules and Pragmata
	617	elsif (/^=head2 (.*?)$/) {
	618	my $name = $1;
	619	if ($current_section) {
	620	push @{ $current_section->{items} }, $current_item
	621	if $current_item;
	622	$sections{ $current_section->{name} } = $current_section;
	623	undef $current_section;
	624	}
	625
	626	if ( is_desired_section_name($name) ) {
	627	undef $in_Modules_and_Pragmata_preamble;
	628	if ( $nested_element_level > 0 ) {
	629	die "Unexpected head2 at line no. $.";
	630	}
	631	my $title = get_section_name_from_heading($name);
	632	if ( exists $sections{$title} ) {
	633	die "$name occurred twice at line no. $.";
	634	}
	635	$current_section = {};
	636	$current_section->{name} = $title;
	637	$current_section->{preceding_text} = $_;
	638	$current_section->{items} = [];
	639	$nested_element_level = 0;
	640	next;
	641	}
	642
	643	# otherwise treat section as plain text
	644	else {
	645	if ($in_Modules_and_Pragmata_preamble) {
	646	$out .= $_;
	647	}
	648	else {
	649	$append_to_out .= $_;
	650	}
	651	next;
	652	}
	653	}
	654
	655	elsif ($current_section) {
	656
	657	# not in an over region
	658	if ( $nested_element_level == 0 ) {
	659	if (/^=over/) {
	660	$nested_element_level++;
	661	}
	662	if ( scalar @{ $current_section->{items} } > 0 ) {
	663	$current_section->{following_text} .= $_;
	664	}
	665	else {
	666	$current_section->{preceding_text} .= $_;
	667	}
	668	next;
	669	}
	670
	671	if ($current_item) {
	672	if ($need_to_parse_module_name) {
	673	# the item may not have a parsable module name, which means that
	674	# $current_item->{name} will never be defined.
	675	if (/^(?:L\|C)<(.+?)>/) {
	676	$current_item->{name} = $1;
	677	undef $need_to_parse_module_name;
	678	}
	679	# =item or =back signals the end of an item
	680	# block, which we handle below
	681	if ( !/^=(?:item\|back)/ ) {
	682	$current_item->{text} .= $_;
	683	next;
	684	}
	685	}
	686	# currently in an over region
	687	# treat text inside region as plain text
	688	if ( $nested_element_level > 1 ) {
	689	if (/^=back/) {
	690	$nested_element_level--;
	691	}
	692	elsif (/^=over/) {
	693	$nested_element_level++;
	694	}
	695	$current_item->{text} .= $_;
	696	next;
	697	}
	698	# entering over region
	699	if (/^=over/) {
	700	$nested_element_level++;
	701	$current_item->{text} .= $_;
	702	next;
	703	}
	704	# =item or =back signals the end of an item
	705	# block, which we handle below
	706	if ( !/^=(?:item\|back)/ ) {
	707	$current_item->{text} .= $_;
	708	next;
	709	}
	710	}
	711
	712	if (/^=item \*/) {
	713	push @{ $current_section->{items} }, $current_item
	714	if $current_item;
	715	$current_item = { text => $_ };
	716	$need_to_parse_module_name = 1;
	717	next;
	718	}
	719
	720	if (/^=back/) {
	721	push @{ $current_section->{items} }, $current_item
	722	if $current_item;
	723	undef $current_item;
	724	$nested_element_level--;
	725	}
	726
	727	if ( scalar @{ $current_section->{items} } == 0 ) {
	728	$current_section->{preceding_text} .= $_;
	729	}
	730	else {
	731	$current_section->{following_text} .= $_;
	732	}
	733	next;
	734	}
	735
	736	# text in Modules and Pragmata not in a head2 region
	737	else {
	738	if ($in_Modules_and_Pragmata_preamble) {
	739	$out .= $_;
	740	}
	741	else {
	742	$append_to_out .= $_;
	743	}
	744	next;
	745	}
	746	}
	747	close $fh;
	748	die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata;
	749	return $out;
	750	}
	751
	752	}
	753
	754	{
	755	package DeltaParser;
	756	use Pod::Simple::SimpleTree;
	757
	758	sub new {
	759	my ($class, $input) = @_;
	760
	761	my $self = bless {} => $class;
	762
	763	my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root;
	764	splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure,
	765	# just the nodes within it
	766
	767	$self->_parse_delta($parsed_pod);
	768
	769	return $self;
	770	}
	771
	772	# creates the accessor methods:
	773	# new_modules
	774	# updated_modules
	775	# removed_modules
	776	for my $k (keys %sections) {
	777	no strict 'refs';
	778	my $m = "${k}_modules";
	779	*$m = sub { $_[0]->{$m} };
	780	}
	781
	782	sub _parse_delta {
	783	my ($self, $pod) = @_;
	784
	785	my $new_section = $self->_look_for_section( $pod, $sections{new} );
	786	my $updated_section = $self->_look_for_section( $pod, $sections{updated} );
	787	my $removed_section = $self->_look_for_section( $pod, $sections{removed} );
	788
	789	$self->_parse_new_section($new_section);
	790	$self->_parse_updated_section($updated_section);
	791	$self->_parse_removed_section($removed_section);
	792
	793	for (qw/new_modules updated_modules removed_modules/) {
	794	$self->{$_} =
	795	[ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ];
	796	}
	797
	798	return;
	799	}
	800
	801	sub _parse_new_section {
	802	my ($self, $section) = @_;
	803
	804	$self->{new_modules} = [];
	805	return unless $section;
	806	$self->{new_modules} = $self->_parse_section($section => sub {
	807	my ($el) = @_;
	808
	809	my ($first, $second) = @{ $el }[2, 3];
	810	my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/;
	811
	812	return [ $first->[2], undef, $ver ];
	813	});
	814
	815	return;
	816	}
	817
	818	sub _parse_updated_section {
	819	my ($self, $section) = @_;
	820
	821	$self->{updated_modules} = [];
	822	return unless $section;
	823	$self->{updated_modules} = $self->_parse_section($section => sub {
	824	my ($el) = @_;
	825
	826	my ($first, $second) = @{ $el }[2, 3];
	827	my $module = $first->[2];
	828
	829	# the regular expression matches the following:
	830	# from VERSION_NUMBER to VERSION_NUMBER
	831	# from VERSION_NUMBER to VERSION_NUMBER.
	832	# from version VERSION_NUMBER to version VERSION_NUMBER.
	833	# from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER
	834	# from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER
	835	#
	836	# some perldeltas contain more than one module listed in an entry, this only attempts to match the
	837	# first module
	838	my ($old, $new) = $second =~
	839	/from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]\|\.\s\|\.$\|$).*/s;
	840
	841	warn "Unable to extract old or new version of $module from perldelta"
	842	if !defined $old \|\| !defined $new;
	843
	844	return [ $module, $old, $new ];
	845	});
	846
	847	return;
	848	}
	849
	850	sub _parse_removed_section {
	851	my ($self, $section) = @_;
	852
	853	$self->{removed_modules} = [];
	854	return unless $section;
	855	$self->{removed_modules} = $self->_parse_section($section => sub {
	856	my ($el) = @_;
	857
	858	my ($first, $second) = @{ $el }[2, 3];
	859	my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/;
	860
	861	return [ $first->[2], $old, undef ];
	862	});
	863
	864	return;
	865	}
	866
	867	sub _parse_section {
	868	my ($self, $section, $parser) = @_;
	869
	870	my $items = $self->_look_down($section => sub {
	871	my ($el) = @_;
	872	return unless ref $el && $el->[0] =~ /^item-/
	873	&& @{ $el } > 2 && ref $el->[2];
	874	return unless $el->[2]->[0] =~ /C\|L/;
	875
	876	return 1;
	877	});
	878
	879	return [map { $parser->($_) } @{ $items }];
	880	}
	881
	882	sub _look_down {
	883	my ($self, $pod, $predicate) = @_;
	884	my @pod = @{ $pod };
	885
	886	my @l;
	887	while (my $el = shift @pod) {
	888	push @l, $el if $predicate->($el);
	889	if (ref $el) {
	890	my @el = @{ $el };
	891	splice @el, 0, 2;
	892	unshift @pod, @el if @el;
	893	}
	894	}
	895
	896	return @l ? \@l : undef;
	897	}
	898
	899	sub _look_for_section {
	900	my ($self, $pod, $section) = @_;
	901
	902	my $level;
	903	$self->_look_for_range($pod,
	904	sub {
	905	my ($el) = @_;
	906	my ($heading) = $el->[0] =~ /^head(\d)$/;
	907	my $f = $heading && $el->[2] =~ /^$section/;
	908	$level = $heading if $f && !$level;
	909	return $f;
	910	},
	911	sub {
	912	my ($el) = @_;
	913	$el->[0] =~ /^head(\d)$/ && $1 <= $level;
	914	},
	915	);
	916	}
	917
	918	sub _look_for_range {
	919	my ($self, $pod, $start_predicate, $stop_predicate) = @_;
	920
	921	my @l;
	922	for my $el (@{ $pod }) {
	923	if (@l) {
	924	return \@l if $stop_predicate->($el);
	925	}
	926	else {
	927	next unless $start_predicate->($el);
	928	}
	929	push @l, $el;
	930	}
	931
	932	return;
	933	}
	934	}
	935
	936	run;