my $no_name = "There is no NAME";
my $missing_name_description = "The NAME should have a dash and short description after it";
-# objects, tests, etc can't be pods, so don't look for them.
-my $non_pods = qr/\.(?:[achot]|zip|gz|bz2|jar|tar|tgz|PL|so)$/;
+# objects, tests, etc can't be pods, so don't look for them. Also skip
+# files output by the patch program. Could also ignore most of .gitignore
+# files, but not all, so don't.
+my $non_pods = qr/ (?: \.
+ (?: [achot] | zip | gz | bz2 | jar | tar | tgz | PL | so
+ | orig | rej | patch # Patch program output
+ | sw[op] | \#.* # Editor droppings
+ )
+ $
+ ) | ~$ # Another editor dropping
+ /x;
# Pod::Checker messages to suppress
}
# This is to get this to work across multiple file systems, including those
-# that are not case sensitive. The db is stored in lower case, and all file
-# name comparisons are done that way.
+# that are not case sensitive. The db is stored in lower case, Un*x style,
+# and all file name comparisons are done that way.
sub canonicalize($) {
- return lc File::Spec->canonpath(shift);
+ my $input = shift;
+ my ($volume, $directories, $file)
+ = File::Spec->splitpath(File::Spec->canonpath($input));
+ # Assumes $volume is constant for everything in this directory structure
+ $directories = "" if ! $directories;
+ $file = "" if ! $file;
+ my $output = lc join '/', File::Spec->splitdir($directories), $file;
+ $output =~ s! / /+ !/!gx; # Multiple slashes => single slash
+ return $output;
}
# re's for messages that Pod::Checker outputs
my $location = qr/ \b (?:in|at|on|near) \s+ /xi;
my $optional_location = qr/ (?: $location )? /xi;
- my $line_reference = qr/ [('"]? $optional_location line\
+ my $line_reference = qr/ [('"]? $optional_location \b line \s+
(?: \d+ | EOF | \Q???\E | - )
[)'"]? /xi;
$self->SUPER::verbatim($paragraph, $line_num, $pod_para);
+ my $addr = Scalar::Util::refaddr $self;
+
# Pick up the name, since the parent class doesn't in verbatim
# NAMEs; so treat as non-verbatim. The parent class only allows one
# paragraph in a NAME section, so if there is an extra blank line, it
# will trigger a message, but such a blank line is harmless, so skip
# in that case.
- if ($in_NAME{Scalar::Util::refaddr $self} && $paragraph =~ /\S/) {
+ if ($in_NAME{$addr} && $paragraph =~ /\S/) {
$self->textblock($paragraph, $line_num, $pod_para);
}
my @lines = split /^/, $paragraph;
for my $i (0 .. @lines - 1) {
+ if ( my $encoding = $seen_encoding_cmd{$addr} ) {
+ require Encode;
+ $lines[$i] = Encode::decode($encoding, $lines[$i]);
+ }
$lines[$i] =~ s/\s+$//;
my $indent = $self->get_current_indent;
my $exceeds = length(Text::Tabs::expand($lines[$i]))
}
elsif ($cmd eq "encoding") {
my ($file, $line) = $pod_para->file_line;
- $seen_encoding_cmd{$addr} = 1;
+ $seen_encoding_cmd{$addr} = $paragraph; # for later decoding
if ($command_count{$addr} != 1 && $seen_pod_cmd{$addr}) {
$self->poderror({ -line => $line, -file => $file,
-msg => $encoding_first
my %files_with_fixes;
my $data_fh;
-open($data_fh, $known_issues) || die "Can't open $known_issues";
+open $data_fh, '<:bytes', $known_issues or die "Can't open $known_issues";
my %counts; # For --counts param, count of each issue type
my %suppressed_files; # Files with at least one issue type to suppress
"configpm" => 1,
"miniperl" => 1,
"perl" => 1,
+
+ # It would be nice if we didn't have to skip this,
+ # but the errors in it are too variable.
+ "pod/perltoc.pod" => 1,
);
# Convert to more generic form.
foreach my $file (keys %excluded_files) {
- $excluded_files{canonicalize($excluded_files{$file})}
- = $excluded_files{$file};
+ delete $excluded_files{$file};
+ $excluded_files{canonicalize($file)} = 1;
}
# re to match files that are to be parsed only if there is an internal link
# Arrange for the output of Pod::Parser to be collected in an array we can
# look at instead of being printed
tie *ALREADY_FH, 'Tie_Array_to_FH', \@pod;
- open my $in_fh, '<', $filename
+ open my $in_fh, '<:bytes', $filename
+
+ # The file should already have been opened once to get here, so if
+ # fails, just die. It's possible that a transitory file containing a
+ # pod would get here, but not bothering to add code for that very
+ # unlikely event.
or die "Can't open '$filename': $!\n";
my $parser = Pod::Parser->new();
my $digest = Digest->new($digest_type);
sub is_pod_file {
+ # If $_ is a pod file, add it to the lists and do other prep work.
+
if (-d $_) {
# Don't look at files in directories that are for tests, nor those
# beginning with a dot
return if $excluded_files{canonicalize($filename)};
- open my $candidate, '<', $_
- or die "Can't open '$File::Find::name': $!\n";
- my @contents = <$candidate>;
- close $candidate;
+ my $contents = do {
+ local $/;
+ my $candidate;
+ if (! open $candidate, '<:bytes', $_) {
+
+ # If a transitory file was found earlier, the open could fail
+ # legitimately and we just skip the file; also skip it if it is a
+ # broken symbolic link, as it is probably just a build problem;
+ # certainly not a file that we would want to check the pod of.
+ # Otherwise fail it here and no reason to process it further.
+ # (But the test count will be off too)
+ ok(0, "Can't open '$filename': $!")
+ if -e $filename && ! -l $filename;
+ return;
+ }
+ <$candidate>;
+ };
# If the file is a .pm or .pod, having any initial '=' on a line is
# grounds for testing it. Otherwise, require a head1 NAME line to view it
# as a potential pod
- my $i;
- my $found = "";
- for ($i = 0; $i < @contents; $i++) {
- next unless $contents[$i] =~ /^=/;
- if ($filename =~ /\.(?:pm|pod)/) {
- $found = 'found_some_pod_line';
- last;
- }
- elsif ($contents[$i] =~ /^=head1 +NAME/) {
- $found = 'found_NAME';
- last;
- }
+ if ($filename =~ /\.(?:pm|pod)/) {
+ return unless $contents =~ /^=/m;
+ } else {
+ return unless $contents =~ /^=head1 +NAME/m;
}
- if ($found) {
- # Here, we know that the file is a pod. Add it to the list of files
- # to check and create a checker object for it.
- push @files, $filename;
- my $checker = My::Pod::Checker->new($filename);
- $filename_to_checker{$filename} = $checker;
-
- # In order to detect duplicate pods and only analyze them once, we
- # compute checksums for the file, so don't have to do an exact
- # compare. Note that if the pod is just part of the file, the
- # checksums can differ for the same pod. That special case is handled
- # later, since if the checksums of the whole file are the same, that
- # case won't even come up. We don't need the checksums for files that
- # we parse only if there is a link to its interior, but we do need its
- # NAME, which is also retrieved in the code below.
- if ($filename =~ / (?: ^(cpan|lib|ext|dist)\/ )
+ # Here, we know that the file is a pod. Add it to the list of files
+ # to check and create a checker object for it.
+
+ push @files, $filename;
+ my $checker = My::Pod::Checker->new($filename);
+ $filename_to_checker{$filename} = $checker;
+
+ # In order to detect duplicate pods and only analyze them once, we
+ # compute checksums for the file, so don't have to do an exact
+ # compare. Note that if the pod is just part of the file, the
+ # checksums can differ for the same pod. That special case is handled
+ # later, since if the checksums of the whole file are the same, that
+ # case won't even come up. We don't need the checksums for files that
+ # we parse only if there is a link to its interior, but we do need its
+ # NAME, which is also retrieved in the code below.
+
+ if ($filename =~ / (?: ^(cpan|lib|ext|dist)\/ )
+ | $only_for_interior_links_re
+ /x) {
+ $digest->add($contents);
+ $digests{$filename} = $digest->digest;
+
+ # lib files aren't analyzed if they are duplicates of files copied
+ # there from some other directory. But to determine this, we need
+ # to know their NAMEs. We might as well find the NAME now while
+ # the file is open. Similarly, cpan files aren't analyzed unless
+ # we're analyzing all of them, or this particular file is linked
+ # to by a file we are analyzing, and thus we will want to verify
+ # that the target exists in it. We need to know at least the NAME
+ # to see if it's worth analyzing, or so we can determine if a lib
+ # file is a copy of a cpan one.
+ if ($filename =~ m{ (?: ^ (?: cpan | lib ) / )
| $only_for_interior_links_re
- /x) {
- $digest->add(@contents);
- $digests{$filename} = $digest->digest;
-
- # lib files aren't analyzed if they are duplicates of files copied
- # there from some other directory. But to determine this, we need
- # to know their NAMEs. We might as well find the NAME now while
- # the file is open. Similarly, cpan files aren't analyzed unless
- # we're analyzing all of them, or this particular file is linked
- # to by a file we are analyzing, and thus we will want to verify
- # that the target exists in it. We need to know at least the NAME
- # to see if it's worth analyzing, or so we can determine if a lib
- # file is a copy of a cpan one.
- if ($filename =~ m{ (?: ^ (?: cpan | lib ) / )
- | $only_for_interior_links_re
- }x) {
- if ($found eq 'found_some_pod_line') {
- for (; $i < @contents; $i++) {
- next if $contents[$i] !~ /^=head1/;
- $found = 'found_NAME'
- if $contents[$i] =~ /^=head1 +NAME/;
- last;
- }
- }
- if ($found eq 'found_NAME') {
- $i++; # The NAME starts on a later line
-
- # Skip empty lines
- while ($contents[$i] !~ /\S/) { $i++ }
-
- # The NAME is the first non-spaces on the line up to a
- # comma, dash or end of line. Otherwise, it's invalid and
- # this pod doesn't have a legal name that we're smart
- # enough to find currently. But the parser will later
- # find it if it thinks there is a legal name, and set the
- # name
- if ($contents[$i] =~ /^ \s* ( \S+?) \s* (?: [,-] | $ )/x) {
- my $name = $1;
- $checker->name($name);
- $id_to_checker{$name} = $checker
- if $filename =~ m{^cpan/};
- }
- }
- elsif ($filename =~ m{^cpan/}) {
- $id_to_checker{$digests{$filename}} = $checker;
+ }x) {
+ if ($contents =~ /^=head1 +NAME.*/mg) {
+ # The NAME is the first non-spaces on the line up to a
+ # comma, dash or end of line. Otherwise, it's invalid and
+ # this pod doesn't have a legal name that we're smart
+ # enough to find currently. But the parser will later
+ # find it if it thinks there is a legal name, and set the
+ # name
+ if ($contents =~ /\G # continue from the line after =head1
+ \s* # ignore any empty lines
+ ^ \s* ( \S+?) \s* (?: [,-] | $ )/mx) {
+ my $name = $1;
+ $checker->name($name);
+ $id_to_checker{$name} = $checker
+ if $filename =~ m{^cpan/};
}
}
+ elsif ($filename =~ m{^cpan/}) {
+ $id_to_checker{$digests{$filename}} = $checker;
+ }
}
}
+
+ return;
} # End of is_pod_file()
# Start of real code that isn't processing the command line.