BEGIN {
chdir 't';
- unshift @INC, "../lib";
+ @INC = "../lib";
+ # Do not require test.pl, this file has its own framework.
}
use strict;
use Text::Tabs;
BEGIN {
+ if ( $Config{usecrosscompile} ) {
+ print "1..0 # Not all files are available during cross-compilation\n";
+ exit 0;
+ }
+ if ($^O eq 'dec_osf') {
+ print "1..0 # $^O cannot handle this test\n";
+ exit(0);
+ }
require '../regen/regen_lib.pl';
}
cd t
./perl -I../lib porting/podcheck.t [--show_all] [--cpan] [--deltas]
- [--counts] [ FILE ...]
+ [--counts] [--pedantic] [FILE ...]
+
./perl -I../lib porting/podcheck.t --add_link MODULE ...
./perl -I../lib porting/podcheck.t --regen
database. It also suppresses the C<(section) deprecated> message from
Pod::Checker, since specifying the man page section number is quite proper to do.
-The additional checks it makes are:
+The additional checks it always makes are:
=over
is to actually create a link to C<search.cpan.org> with an embedded query for
the desired pod or man page. That means that links outside the distribution
are valid. podcheck.t doesn't verify the validity of such links, but instead
-keeps a data base of those known to be valid. This means that if a link to a
+keeps a database of those known to be valid. This means that if a link to a
target not on the list is created, the target needs to be added to the data
base. This is accomplished via the L<--add_link|/--add_link MODULE ...>
option to podcheck.t, described below.
name, it is likely that the internal target was meant, and the C<"/"> is
missing from the C<LE<lt>E<gt>> pod command.
+=item Missing or duplicate NAME or missing NAME short description
+
+A pod can't be linked to unless it has a unique name.
+And a NAME should have a dash and short description after it.
+
+=item =encoding statement issues
+
+This indicates if an C<=encoding> statement should be present, or moved to the
+front of the pod.
+
+=back
+
+If the C<PERL_POD_PEDANTIC> environment variable is set or the C<--pedantic>
+command line argument is provided then a few more checks are made.
+The pedantic checks are:
+
+=over
+
=item Verbatim paragraphs that wrap in an 80 (including 1 spare) column window
It's annoying to have lines wrap when displaying pod documentation in a
Often, the easiest thing to do to gain space for these is to lower the indent
to just one space.
-=item Missing or duplicate NAME or missing NAME short description
-
-A pod can't be linked to unless it has a unique name.
-And a NAME should have a dash and short description after it.
-
-=item =encoding statement issues
-
-This indicates if an C<=encoding> statement should be present, or moved to the
-front of the pod.
-
=item Items that perhaps should be links
There are mentions of apparent files in the pods that perhaps should be links
actually are.
Another problem is that there is currently no check that modules listed as
-valid in the data base
+valid in the database
actually are. Thus any errors introduced there will remain there.
=head2 Specially handled pods
podcheck.t checks that links within the Perl core distribution are valid, but
it doesn't check links to man pages or external modules. When it finds
-a broken link, it checks its data base of external modules and man pages,
+a broken link, it checks its database of external modules and man pages,
and only if not found there does it raise a message. This option just adds
the list of modules and man page references that follow it on the command line
-to that data base.
+to that database.
For example,
cd t
./perl -I../lib porting/podcheck.t --add_link Unicode::Casing
-causes the external module "Unicode::Casing" to be added to the data base, so
+causes the external module "Unicode::Casing" to be added to the database, so
C<LE<lt>Unicode::CasingE<gt>> will be considered valid.
=item --regen
-Regenerate the data base used by podcheck.t to include all the existing
+Regenerate the database used by podcheck.t to include all the existing
potential problems. Future runs of the program will not then flag any of
-these.
+these. Setting this option also sets C<--pedantic>.
=item --cpan
=item --counts
Instead of testing, this just dumps the counts of the occurrences of the
-various types of potential problems in the data base.
+various types of potential problems in the database.
+
+=item --pedantic
+
+There are three potential problems that are not checked for by default.
+This options enables them. The environment variable C<PERL_POD_PEDANTIC>
+can be set to 1 to enable this option also.
+This option is set when C<--regen> is used.
=back
# Our warning messages. Better not have [('"] in them, as those are used as
# delimiters for variable parts of the messages by poderror.
-my $line_length = "Verbatim line length including indents exceeds $MAX_LINE_LENGTH by";
my $broken_link = "Apparent broken link";
my $broken_internal_link = "Apparent internal link is missing its forward slash";
-my $see_not_linked = "? Should you be using L<...> instead of";
-my $C_with_slash = "? Should you be using F<...> or maybe L<...> instead of";
my $multiple_targets = "There is more than one target";
my $duplicate_name = "Pod NAME already used";
my $need_encoding = "Should have =encoding statement because have non-ASCII";
my $encoding_first = "=encoding must be first command (if present)";
my $no_name = "There is no NAME";
my $missing_name_description = "The NAME should have a dash and short description after it";
+# the pedantic warnings messages
+my $line_length = "Verbatim line length including indents exceeds $MAX_LINE_LENGTH by";
+my $C_not_linked = "? Should you be using L<...> instead of";
+my $C_with_slash = "? Should you be using F<...> or maybe L<...> instead of";
# objects, tests, etc can't be pods, so don't look for them. Also skip
# files output by the patch program. Could also ignore most of .gitignore
my %excluded_files = (
canonicalize("lib/unicore/mktables") => 1,
canonicalize("Porting/make-rmg-checklist") => 1,
+ # this one is a POD, but unfinished, so skip
+ # it for now
+ canonicalize("Porting/perl5200delta.pod") => 1,
canonicalize("Porting/perldelta_template.pod") => 1,
canonicalize("regen/feature.pl") => 1,
+ canonicalize("regen/warnings.pl") => 1,
canonicalize("autodoc.pl") => 1,
canonicalize("configpm") => 1,
canonicalize("miniperl") => 1,
my $regen = 0;
my $add_link = 0;
my $show_all = 0;
+my $pedantic = 0;
my $do_upstream_cpan = 0; # Assume that are to skip anything in /cpan
my $do_deltas = 0; # And stable perldeltas
$arg =~ s/^--/-/; # Treat '--' the same as a single '-'
if ($arg eq '-regen') {
$regen = 1;
+ $pedantic = 1;
}
elsif ($arg eq '-add_link') {
$add_link = 1;
elsif ($arg eq '-counts') {
$show_counts = 1;
}
+ elsif ($arg eq '-pedantic') {
+ $pedantic = 1;
+ }
else {
die <<EOF;
Unknown option '$arg'
Usage: $0 [ --regen | --cpan | --show_all | FILE ... | --add_link MODULE ... ]\n"
- --add_link -> Add the MODULE and man page references to the data base
+ --add_link -> Add the MODULE and man page references to the database
--regen -> Regenerate the data file for $0
--cpan -> Include files in the cpan subdirectory.
--deltas -> Include stable perldeltas
--show_all -> Show all known potential problems
--counts -> Don't test, but give summary counts of the currently
existing database
+ --pedantic -> Check for overly long lines in verbatim blocks
EOF
}
}
+$pedantic = 1 if exists $ENV{PERL_POD_PEDANTIC} and $ENV{PERL_POD_PEDANTIC};
my @files = @ARGV;
my $cpan_or_deltas = $do_upstream_cpan || $do_deltas;
$lines[$i] =~ s/\s+$//;
my $indent = $self->get_current_indent;
- if ($ENV{PERL_POD_PEDANTIC}) {
- my $exceeds = length(Text::Tabs::expand($lines[$i]))
- + $indent - $MAX_LINE_LENGTH;
- next unless $exceeds > 0;
- my ($file, $line) = $pod_para->file_line;
- $self->poderror({ -line => $line + $i, -file => $file,
- -msg => $line_length,
- parameter => "+$exceeds (including " . ($indent - $INDENT) . " from =over's)",
- });
+ if ($pedantic) { # TODO: this check should be moved higher
+ # to avoid more unnecessary work
+ my $exceeds = length(Text::Tabs::expand($lines[$i]))
+ + $indent - $MAX_LINE_LENGTH;
+ next unless $exceeds > 0;
+ my ($file, $line) = $pod_para->file_line;
+ $self->poderror({ -line => $line + $i, -file => $file,
+ -msg => $line_length,
+ parameter => "+$exceeds (including " . ($indent - $INDENT) . " from =over's)",
+ });
}
}
}
&& $interior !~ /\/.+\//)
)
) {
+ # TODO: move the checking of $pedantic higher up
$self->poderror({ -line => $line, -file => $file,
- -msg => $see_not_linked,
+ -msg => $C_not_linked,
parameter => $construct
- });
+ }) if $pedantic;
}
}
}
while ($paragraph =~ m/$C_path_re/g) {
my $construct = $1;
+ # TODO: move the checking of $pedantic higher up
$self->poderror({ -line => $line, -file => $file,
-msg => $C_with_slash,
parameter => $construct
- });
+ }) if $pedantic;
}
return;
}
}
-my %filename_to_checker; # Map a filename to it's pod checker object
-my %id_to_checker; # Map a checksum to it's pod checker object
-my %nodes; # key is filename, values are nodes in that file.
-my %nodes_first_word; # same, but value is first word of each node
-my %valid_modules; # List of modules known to exist outside us.
-my %digests; # checksums of files, whose names are the keys
-my %filename_to_pod; # Map a filename to its pod NAME
+my %filename_to_checker; # Map a filename to its pod checker object
+my %id_to_checker; # Map a checksum to its pod checker object
+my %nodes; # key is filename, values are nodes in that file.
+my %nodes_first_word; # same, but value is first word of each node
+my %valid_modules; # List of modules known to exist outside us.
+my %digests; # checksums of files, whose names are the keys
+my %filename_to_pod; # Map a filename to its pod NAME
my %files_with_unknown_issues;
my %files_with_fixes;
my @existing_issues;
-while (<$data_fh>) { # Read the data base
+while (<$data_fh>) { # Read the database
chomp;
next if /^\s*(?:#|$)/; # Skip comment and empty lines
if (/\t/) {
# name
if ($contents =~ /\G # continue from the line after =head1
\s* # ignore any empty lines
+
+ # ignore =for paragraphs followed by empty
+ # lines
+ (?: ^ =for .*? \n (?: [^\s]*? \n )* \s* )*
+
^ \s* ( \S+?) \s* (?: [,-] | $ )/mx) {
my $name = $1;
$checker->name($name);
plan (tests => scalar @files) if ! $regen;
- # Sort file names so we get consistent results, and to put cpan last,
- # preceeded by the ones that we don't generally parse. This is because both
- # these classes are generally parsed only if there is a link to the interior
- # of them, and we have to parse all others first to guarantee that they don't
- # have such a link. 'lib' files come just before these, as some of these are
- # duplicates of others. We already have figured this out when gathering the
- # data as a special case for all such files, but this, while unnecessary,
- # puts the derived file last in the output. 'readme' files come before those,
- # as those also could be duplicates of others, which are considered the
- # primary ones. These currently aren't figured out when gathering data, so
- # are done here.
- @files = sort { if ($a =~ /^cpan/) {
- return 1 if $b !~ /^cpan/;
- return lc $a cmp lc $b;
- }
- elsif ($b =~ /^cpan/) {
- return -1;
- }
- elsif ($a =~ /$only_for_interior_links_re/) {
- return 1 if $b !~ /$only_for_interior_links_re/;
- return lc $a cmp lc $b;
- }
- elsif ($b =~ /$only_for_interior_links_re/) {
- return -1;
- }
- elsif ($a =~ /^lib/) {
- return 1 if $b !~ /^lib/;
- return lc $a cmp lc $b;
- }
- elsif ($b =~ /^lib/) {
- return -1;
- } elsif ($a =~ /\breadme\b/i) {
- return 1 if $b !~ /\breadme\b/i;
- return lc $a cmp lc $b;
- }
- elsif ($b =~ /\breadme\b/i) {
- return -1;
- }
- else {
- return lc $a cmp lc $b;
- }
- }
- @files;
+# Sort file names so we get consistent results, and to put cpan last,
+# preceded by the ones that we don't generally parse. This is because both
+# these classes are generally parsed only if there is a link to the interior
+# of them, and we have to parse all others first to guarantee that they don't
+# have such a link. 'lib' files come just before these, as some of these are
+# duplicates of others. We already have figured this out when gathering the
+# data as a special case for all such files, but this, while unnecessary,
+# puts the derived file last in the output. 'readme' files come before those,
+# as those also could be duplicates of others, which are considered the
+# primary ones. These currently aren't figured out when gathering data, so
+# are done here.
+@files = sort { if ($a =~ /^cpan/) {
+ return 1 if $b !~ /^cpan/;
+ return lc $a cmp lc $b;
+ }
+ elsif ($b =~ /^cpan/) {
+ return -1;
+ }
+ elsif ($a =~ /$only_for_interior_links_re/) {
+ return 1 if $b !~ /$only_for_interior_links_re/;
+ return lc $a cmp lc $b;
+ }
+ elsif ($b =~ /$only_for_interior_links_re/) {
+ return -1;
+ }
+ elsif ($a =~ /^lib/) {
+ return 1 if $b !~ /^lib/;
+ return lc $a cmp lc $b;
+ }
+ elsif ($b =~ /^lib/) {
+ return -1;
+ } elsif ($a =~ /\breadme\b/i) {
+ return 1 if $b !~ /\breadme\b/i;
+ return lc $a cmp lc $b;
+ }
+ elsif ($b =~ /\breadme\b/i) {
+ return -1;
+ }
+ else {
+ return lc $a cmp lc $b;
+ }
+ }
+ @files;
# Now go through all the files and parse them
FILE:
$same = $prior_contents eq $contents;
}
+ use File::Basename 'basename';
if ($same) {
$checker->set_skip("The pod of $filename is a duplicate of "
. "the pod for $prior_filename");
$checker->set_skip("CPAN is upstream for $filename");
} elsif ( $filename =~ /^utils/ or $prior_filename =~ /^utils/ ) {
$checker->set_skip("$filename copy is in utils/");
+ } elsif ($prior_filename =~ /^(?:cpan|ext|dist)/
+ && $filename !~ /^(?:cpan|ext|dist)/
+ && basename($prior_filename) eq basename($filename))
+ {
+ $checker->set_skip("$filename: Need to run make?");
} else { # Here have two pods with identical names that differ
$prior_checker->poderror(
{ -msg => $duplicate_name,
# Transform pod language to what we are expecting
$node =~ s,E<sol>,/,g;
$node =~ s/E<verbar>/|/g;
+ $node =~ s/E<lt>/</g;
+ $node =~ s/E<gt>/>/g;
# If link is to a node that exists in the file, is ok
if ($nodes{$linked_to_page}{$node}) {
next if ! $known_problems{$canonical}{$message};
next if $known_problems{$canonical}{$message} < 0; # Preserve negs
- next if index($message, $line_length) == 0
- and ! $ENV{PERL_POD_PENDANTIC};
+ next if !$pedantic and $message =~
+ /^(?:\Q$line_length\E|\Q$C_not_linked\E|\Q$C_with_slash\E)/;
my $diagnostic = output_thanks($filename, $known_problems{$canonical}{$message}, 0, $message);
push @diagnostics, $diagnostic if $diagnostic;
}
if (! $regen
- && ! ok (keys %known_problems == 0, "The known problems data base includes no references to non-existent files"))
+ && ! ok (keys %known_problems == 0, "The known problems database ($data_dir/known_pod_issues.dat) includes no references to non-existent files"))
{
note("The following files were not found: "
. join ", ", keys %known_problems);