./perl -I../lib porting/podcheck.t --add_link Unicode::Casing
causes the external module "Unicode::Casing" to be added to the data base, so
-C<LE<lt>Unicode::Casing<gt>> will be considered valid.
+C<LE<lt>Unicode::CasingE<gt>> will be considered valid.
=item --regen
=cut
+# VMS builds have a '.com' appended to utility and script names, and it adds a
+# trailing dot for any other file name that doesn't have a dot in it. The db
+# is stored without those things. This regex allows for these special file
+# names to be dealt with. It needs to be interpolated into a larger regex
+# that furnishes the closing boundary.
+my $vms_re = qr/ \. (?: com )? /x;
+
+# Some filenames in the MANIFEST match $vms_re, and so must not be handled the
+# same way that that the special vms ones are. This hash lists those.
+my %special_vms_files;
+
# This is to get this to work across multiple file systems, including those
# that are not case sensitive. The db is stored in lower case, Un*x style,
# and all file name comparisons are done that way.
# Assumes $volume is constant for everything in this directory structure
$directories = "" if ! $directories;
$file = "" if ! $file;
- $file =~ s/\.$// if $^O eq 'VMS';
- my $output = lc join '/', File::Spec->splitdir($directories), $file;
- $output =~ s! / /+ !/!gx; # Multiple slashes => single slash
- return $output;
+ $file = lc join '/', File::Spec->splitdir($directories), $file;
+ $file =~ s! / /+ !/!gx; # Multiple slashes => single slash
+
+ # The db is stored without the special suffixes that are there in VMS, so
+ # strip them off to get the comparable name. But some files on all
+ # platforms have these suffixes, so this shouldn't happen for them, as any
+ # of their db entries will have the suffixes in them. The hash has been
+ # populated with these files.
+ if ($^O eq 'VMS'
+ && $file =~ / ( $vms_re ) $ /x
+ && ! exists $special_vms_files{$file})
+ {
+ $file =~ s/ $1 $ //x;
+ }
+ return $file;
}
#####################################################
# Not really pods, but can look like them.
my %excluded_files = (
canonicalize("lib/unicore/mktables") => 1,
+ canonicalize("Porting/make-rmg-checklist") => 1,
canonicalize("Porting/perldelta_template.pod") => 1,
+ canonicalize("regen/feature.pl") => 1,
canonicalize("autodoc.pl") => 1,
canonicalize("configpm") => 1,
canonicalize("miniperl") => 1,
canonicalize("perl") => 1,
+ canonicalize('cpan/Pod-Perldoc/corpus/no-head.pod') => 1,
+ canonicalize('cpan/Pod-Perldoc/corpus/perlfunc.pod') => 1,
+ canonicalize('cpan/Pod-Perldoc/corpus/utf8.pod') => 1,
+ canonicalize("lib/unicore/mktables") => 1,
);
# This list should not include anything for which case sensitivity is
| $lib_ext # object libraries
| $lib_so # shared libraries
| $dl_ext # dynamic libraries
+ | gif # GIF images (example files from CGI.pm)
+ | eg # examples from libnet
)
$
) | ~$ | \ \(Autosaved\)\.txt$ # Other editor droppings
# files with the same names except for case.
open my $manifest_fh, '<:bytes', $MANIFEST or die "Can't open $MANIFEST";
while (<$manifest_fh>) {
+
+ # While we have MANIFEST open, on VMS platforms, look for files that match
+ # the magic VMS file names that have to be handled specially. Add these
+ # to the list of them.
+ if ($^O eq 'VMS' && / ^ ( [^\t]* $vms_re ) \t /x) {
+ $special_vms_files{$1} = 1;
+ }
if (/ ^ ( [^\t]* \. PL ) \t /x) {
$excluded_files{canonicalize($1)} = 1;
}
sub plan {
my %plan = @_;
- $planned = $plan{tests};
+ $planned = $plan{tests} + 1; # +1 for final test that files haven't
+ # been removed
print "1..$planned\n";
return;
}
$current_test++;
print "not " unless $success;
print "ok $current_test - $message\n";
- return;
+ return $success;
}
sub skip {
delete $problems{$self->get_filename};
return;
}
+
+ sub parse_from_file {
+ # This overrides the super class method so that if an open fails on a
+ # transitory file, it doesn't croak. It returns 1 if it did find the
+ # file, 0 if it didn't
+
+ my $self = shift;
+ my $filename = shift;
+ # ignores 2nd param, which is output file. Always uses undef
+
+ if (open my $in_fh, '<:bytes', $filename) {
+ $self->SUPER::parse_from_filehandle($in_fh, undef);
+ close $in_fh;
+ return 1;
+ }
+
+ # If couldn't open file, perhaps it was transitory, and hence not an error
+ return 0 unless -e $filename;
+
+ die "Can't open '$filename': $!\n";
+ }
}
package Tie_Array_to_FH { # So printing actually goes to an array
}
}
-sub extract_pod { # Extracts just the pod from a file
+sub extract_pod { # Extracts just the pod from a file; returns undef if file
+ # doesn't exist
my $filename = shift;
my @pod;
# Arrange for the output of Pod::Parser to be collected in an array we can
# look at instead of being printed
tie *ALREADY_FH, 'Tie_Array_to_FH', \@pod;
- open my $in_fh, '<:bytes', $filename
+ if (open my $in_fh, '<:bytes', $filename) {
+ my $parser = Pod::Parser->new();
+ $parser->parse_from_filehandle($in_fh, *ALREADY_FH);
+ close $in_fh;
- # The file should already have been opened once to get here, so if
- # fails, just die. It's possible that a transitory file containing a
- # pod would get here, but not bothering to add code for that very
- # unlikely event.
- or die "Can't open '$filename': $!\n";
-
- my $parser = Pod::Parser->new();
- $parser->parse_from_filehandle($in_fh, *ALREADY_FH);
- close $in_fh;
+ return join "", @pod
+ }
- return join "", @pod
+ # The file should already have been opened once to get here, so if that
+ # fails, something is wrong. It's possible that a transitory file
+ # containing a pod would get here, so if the file no longer exists just
+ # return undef.
+ return unless -e $filename;
+ die "Can't open '$filename': $!\n";
}
my $digest = Digest->new($digest_type);
+# This is used as a callback from File::Find::find(), which always constructs
+# pathnames using Unix separators
sub is_pod_file {
# If $_ is a pod file, add it to the lists and do other prep work.
- if (-d $_) {
+ if (-d) {
# Don't look at files in directories that are for tests, nor those
# beginning with a dot
- if ($_ eq 't' || $_ =~ /^\../) {
+ if (m!/t\z! || m!/\.!) {
$File::Find::prune = 1;
}
return;
}
- if ($_ =~ /^\./ # No hidden Unix files
- || $_ =~ $non_pods) {
+ return unless -r && -s; # Can't check it if can't read it; no need to
+ # check if 0 length
+ return unless -f || -l; # Weird file types won't be pods
+
+ my ($leaf) = m!([^/]+)\z!;
+ if (m!/\.! # No hidden Unix files
+ || $leaf =~ $non_pods) {
note("Not considering $_") if DEBUG;
return;
}
my $filename = $File::Find::name;
- # Assumes that the path separator is exactly one character.
- $filename =~ s/^\..//;
+ # $filename is relative, like './path'. Strip that initial part away.
+ $filename =~ s!^\./!! or die 'Unexpected pathname "$filename"';
return if $excluded_files{canonicalize($filename)};
# Otherwise fail it here and no reason to process it further.
# (But the test count will be off too)
ok(0, "Can't open '$filename': $!")
- if -e $filename && ! -l $filename;
+ if -r $filename && ! -l $filename;
return;
}
<$candidate>;
chdir File::Spec->updir;
# And look in this directory and all its subdirectories
- find( \&is_pod_file, '.');
+ find( {wanted => \&is_pod_file, no_chdir => 1}, '.');
# Add ourselves to the test
push @files, "t/porting/podcheck.t";
@files;
# Now go through all the files and parse them
+FILE:
foreach my $filename (@files) {
my $parsed = 0;
note("parsing $filename") if DEBUG;
# We have set the name in the checker object if there is a possibility
# that no further parsing is necessary, but otherwise do the parsing now.
if (! $checker->name) {
+ if (! $checker->parse_from_file($filename, undef)) {
+ $checker->set_skip("$filename is transitory");
+ next FILE;
+ }
$parsed = 1;
- $checker->parse_from_file($filename, undef);
+
}
if ($checker->num_errors() < 0) { # Returns negative if not a pod
}
else {
my $digest = Digest->new($digest_type);
- $digest->add(extract_pod($filename));
+ my $contents = extract_pod($filename);
+
+ # If the return is undef, it means that $filename was a transitory
+ # file; skip it.
+ next FILE unless defined $contents;
+ $digest->add($contents);
$id = $digest->digest;
}
# reason, but the pods they contain are identical. Extract the
# pods and do the comparisons on just those.
if (! $same && $name) {
- $same = extract_pod($prior_filename) eq extract_pod($filename);
+ my $contents = extract_pod($filename);
+
+ # If return is <undef>, it means that $filename no longer
+ # exists. This means it was a transitory file, and should not
+ # be tested.
+ next FILE unless defined $contents;
+
+ my $prior_contents = extract_pod($prior_filename);
+
+ # If return is <undef>, it means that $prior_filename no
+ # longer exists. This means it was a transitory file, and
+ # should not have been tested, but we already did process it.
+ # What we should do now is to back-out its records, and
+ # process $filename in its stead. But backing out is not so
+ # simple, and so I'm (khw) skipping that unless and until
+ # experience shows that it is needed. We do go process
+ # $filename, and there are potential false positive conflicts
+ # with the transitory $prior_contents, and rerunning the test
+ # should cause it to succeed.
+ goto process_this_pod unless defined $prior_contents;
+
+ $same = $prior_contents eq $contents;
}
if ($same) {
# In any event, don't process this pod that has the same name as
# another.
- next;
+ next FILE;
}
+ process_this_pod:
+
# A unique pod.
$id_to_checker{$id} = $checker;
if ($filename =~ /^cpan/) {
$checker->set_skip("CPAN is upstream for $filename");
}
- elsif ($filename =~ /perl\d+delta/ && ! $do_deltas) {
- $checker->set_skip("$filename is a stable perldelta");
+ elsif ($filename =~ /perl\d+delta/) {
+ if (! $do_deltas) {
+ $checker->set_skip("$filename is a stable perldelta");
+ }
}
elsif ($filename =~ /perltoc/) {
$checker->set_skip("$filename dependent on component pods");
$checker->poderror( { -msg => $no_name,
-line => '???'
});
- next;
+ next FILE;
}
# For skipped files, just get its NAME
{
$checker->node($name) if $name;
}
- else {
- $checker->parse_from_file($filename, undef) if ! $parsed;
+ elsif (! $parsed) {
+ if (! $checker->parse_from_file($filename, undef)) {
+ $checker->set_skip("$filename is transitory");
+ next FILE;
+ }
}
# Go through everything in the file that could be an anchor that
# could be a link target. Count how many there are of the same name.
foreach my $node ($checker->linkable_nodes) {
- next if ! $node; # Can be empty is like '=item *'
+ next FILE if ! $node; # Can be empty is like '=item *'
if (exists $nodes{$name}{$node}) {
$nodes{$name}{$node}++;
}
}
# If regenerating the data file, start with the modules for which we don't
-# check targets
+# check targets. If you change the sort order, you need to run --regen before
+# committing so that future commits that do run regen don't show irrelevant
+# changes.
if ($regen) {
foreach (sort { lc $a cmp lc $b } keys %valid_modules) {
my_safer_print($copy_fh, $_, "\n");
note(join "", @diagnostics,
"See end of this test output for your options on silencing this");
}
+
+ delete $known_problems{$canonical};
}
}
+if (! ok (keys %known_problems == 0, "The known problems data base includes no references to non-existent files")) {
+ note("The following files were not found: "
+ . join ", ", keys %known_problems);
+ note("They will automatically be removed from the db the next time");
+ note(" cd t; ./perl -I../lib porting/podcheck.t --regen");
+ note("is run");
+}
+
my $how_to = <<EOF;
run this test script by hand, using the following formula (on
Un*x-like machines):