X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/b3fdb838d589962e1c590dffd9540666e53c7e21..03ca349ac05d45c55183105f95dc1784f97aa4e3:/t/porting/podcheck.t diff --git a/t/porting/podcheck.t b/t/porting/podcheck.t index 35c9d64..b1022e1 100644 --- a/t/porting/podcheck.t +++ b/t/porting/podcheck.t @@ -213,8 +213,17 @@ my $encoding_first = "=encoding must be first command (if present)"; my $no_name = "There is no NAME"; my $missing_name_description = "The NAME should have a dash and short description after it"; -# objects, tests, etc can't be pods, so don't look for them. -my $non_pods = qr/\.(?:[achot]|zip|gz|bz2|jar|tar|tgz|PL|so)$/; +# objects, tests, etc can't be pods, so don't look for them. Also skip +# files output by the patch program. Could also ignore most of .gitignore +# files, but not all, so don't. +my $non_pods = qr/ (?: \. + (?: [achot] | zip | gz | bz2 | jar | tar | tgz | PL | so + | orig | rej | patch # Patch program output + | sw[op] | \#.* # Editor droppings + ) + $ + ) | ~$ # Another editor dropping + /x; # Pod::Checker messages to suppress @@ -288,10 +297,18 @@ sub suppressed { } # This is to get this to work across multiple file systems, including those -# that are not case sensitive. The db is stored in lower case, and all file -# name comparisons are done that way. +# that are not case sensitive. The db is stored in lower case, Un*x style, +# and all file name comparisons are done that way. sub canonicalize($) { - return lc File::Spec->canonpath(shift); + my $input = shift; + my ($volume, $directories, $file) + = File::Spec->splitpath(File::Spec->canonpath($input)); + # Assumes $volume is constant for everything in this directory structure + $directories = "" if ! $directories; + $file = "" if ! $file; + my $output = lc join '/', File::Spec->splitdir($directories), $file; + $output =~ s! / /+ !/!gx; # Multiple slashes => single slash + return $output; } @@ -414,7 +431,7 @@ package My::Pod::Checker { # Extend Pod::Checker # re's for messages that Pod::Checker outputs my $location = qr/ \b (?:in|at|on|near) \s+ /xi; my $optional_location = qr/ (?: $location )? /xi; - my $line_reference = qr/ [('"]? $optional_location line\ + my $line_reference = qr/ [('"]? $optional_location \b line \s+ (?: \d+ | EOF | \Q???\E | - ) [)'"]? /xi; @@ -533,17 +550,23 @@ package My::Pod::Checker { # Extend Pod::Checker $self->SUPER::verbatim($paragraph, $line_num, $pod_para); + my $addr = Scalar::Util::refaddr $self; + # Pick up the name, since the parent class doesn't in verbatim # NAMEs; so treat as non-verbatim. The parent class only allows one # paragraph in a NAME section, so if there is an extra blank line, it # will trigger a message, but such a blank line is harmless, so skip # in that case. - if ($in_NAME{Scalar::Util::refaddr $self} && $paragraph =~ /\S/) { + if ($in_NAME{$addr} && $paragraph =~ /\S/) { $self->textblock($paragraph, $line_num, $pod_para); } my @lines = split /^/, $paragraph; for my $i (0 .. @lines - 1) { + if ( my $encoding = $seen_encoding_cmd{$addr} ) { + require Encode; + $lines[$i] = Encode::decode($encoding, $lines[$i]); + } $lines[$i] =~ s/\s+$//; my $indent = $self->get_current_indent; my $exceeds = length(Text::Tabs::expand($lines[$i])) @@ -621,7 +644,7 @@ package My::Pod::Checker { # Extend Pod::Checker } elsif ($cmd eq "encoding") { my ($file, $line) = $pod_para->file_line; - $seen_encoding_cmd{$addr} = 1; + $seen_encoding_cmd{$addr} = $paragraph; # for later decoding if ($command_count{$addr} != 1 && $seen_pod_cmd{$addr}) { $self->poderror({ -line => $line, -file => $file, -msg => $encoding_first @@ -771,7 +794,7 @@ my %files_with_unknown_issues; my %files_with_fixes; my $data_fh; -open($data_fh, $known_issues) || die "Can't open $known_issues"; +open $data_fh, '<:bytes', $known_issues or die "Can't open $known_issues"; my %counts; # For --counts param, count of each issue type my %suppressed_files; # Files with at least one issue type to suppress @@ -823,12 +846,16 @@ my %excluded_files = ( "configpm" => 1, "miniperl" => 1, "perl" => 1, + + # It would be nice if we didn't have to skip this, + # but the errors in it are too variable. + "pod/perltoc.pod" => 1, ); # Convert to more generic form. foreach my $file (keys %excluded_files) { - $excluded_files{canonicalize($excluded_files{$file})} - = $excluded_files{$file}; + delete $excluded_files{$file}; + $excluded_files{canonicalize($file)} = 1; } # re to match files that are to be parsed only if there is an internal link @@ -902,7 +929,12 @@ sub extract_pod { # Extracts just the pod from a file # Arrange for the output of Pod::Parser to be collected in an array we can # look at instead of being printed tie *ALREADY_FH, 'Tie_Array_to_FH', \@pod; - open my $in_fh, '<', $filename + open my $in_fh, '<:bytes', $filename + + # The file should already have been opened once to get here, so if + # fails, just die. It's possible that a transitory file containing a + # pod would get here, but not bothering to add code for that very + # unlikely event. or die "Can't open '$filename': $!\n"; my $parser = Pod::Parser->new(); @@ -915,6 +947,8 @@ sub extract_pod { # Extracts just the pod from a file my $digest = Digest->new($digest_type); sub is_pod_file { + # If $_ is a pod file, add it to the lists and do other prep work. + if (-d $_) { # Don't look at files in directories that are for tests, nor those # beginning with a dot @@ -934,94 +968,90 @@ sub is_pod_file { return if $excluded_files{canonicalize($filename)}; - open my $candidate, '<', $_ - or die "Can't open '$File::Find::name': $!\n"; - my @contents = <$candidate>; - close $candidate; + my $contents = do { + local $/; + my $candidate; + if (! open $candidate, '<:bytes', $_) { + + # If a transitory file was found earlier, the open could fail + # legitimately and we just skip the file; also skip it if it is a + # broken symbolic link, as it is probably just a build problem; + # certainly not a file that we would want to check the pod of. + # Otherwise fail it here and no reason to process it further. + # (But the test count will be off too) + ok(0, "Can't open '$filename': $!") + if -e $filename && ! -l $filename; + return; + } + <$candidate>; + }; # If the file is a .pm or .pod, having any initial '=' on a line is # grounds for testing it. Otherwise, require a head1 NAME line to view it # as a potential pod - my $i; - my $found = ""; - for ($i = 0; $i < @contents; $i++) { - next unless $contents[$i] =~ /^=/; - if ($filename =~ /\.(?:pm|pod)/) { - $found = 'found_some_pod_line'; - last; - } - elsif ($contents[$i] =~ /^=head1 +NAME/) { - $found = 'found_NAME'; - last; - } + if ($filename =~ /\.(?:pm|pod)/) { + return unless $contents =~ /^=/m; + } else { + return unless $contents =~ /^=head1 +NAME/m; } - if ($found) { - # Here, we know that the file is a pod. Add it to the list of files - # to check and create a checker object for it. - push @files, $filename; - my $checker = My::Pod::Checker->new($filename); - $filename_to_checker{$filename} = $checker; - - # In order to detect duplicate pods and only analyze them once, we - # compute checksums for the file, so don't have to do an exact - # compare. Note that if the pod is just part of the file, the - # checksums can differ for the same pod. That special case is handled - # later, since if the checksums of the whole file are the same, that - # case won't even come up. We don't need the checksums for files that - # we parse only if there is a link to its interior, but we do need its - # NAME, which is also retrieved in the code below. - if ($filename =~ / (?: ^(cpan|lib|ext|dist)\/ ) + # Here, we know that the file is a pod. Add it to the list of files + # to check and create a checker object for it. + + push @files, $filename; + my $checker = My::Pod::Checker->new($filename); + $filename_to_checker{$filename} = $checker; + + # In order to detect duplicate pods and only analyze them once, we + # compute checksums for the file, so don't have to do an exact + # compare. Note that if the pod is just part of the file, the + # checksums can differ for the same pod. That special case is handled + # later, since if the checksums of the whole file are the same, that + # case won't even come up. We don't need the checksums for files that + # we parse only if there is a link to its interior, but we do need its + # NAME, which is also retrieved in the code below. + + if ($filename =~ / (?: ^(cpan|lib|ext|dist)\/ ) + | $only_for_interior_links_re + /x) { + $digest->add($contents); + $digests{$filename} = $digest->digest; + + # lib files aren't analyzed if they are duplicates of files copied + # there from some other directory. But to determine this, we need + # to know their NAMEs. We might as well find the NAME now while + # the file is open. Similarly, cpan files aren't analyzed unless + # we're analyzing all of them, or this particular file is linked + # to by a file we are analyzing, and thus we will want to verify + # that the target exists in it. We need to know at least the NAME + # to see if it's worth analyzing, or so we can determine if a lib + # file is a copy of a cpan one. + if ($filename =~ m{ (?: ^ (?: cpan | lib ) / ) | $only_for_interior_links_re - /x) { - $digest->add(@contents); - $digests{$filename} = $digest->digest; - - # lib files aren't analyzed if they are duplicates of files copied - # there from some other directory. But to determine this, we need - # to know their NAMEs. We might as well find the NAME now while - # the file is open. Similarly, cpan files aren't analyzed unless - # we're analyzing all of them, or this particular file is linked - # to by a file we are analyzing, and thus we will want to verify - # that the target exists in it. We need to know at least the NAME - # to see if it's worth analyzing, or so we can determine if a lib - # file is a copy of a cpan one. - if ($filename =~ m{ (?: ^ (?: cpan | lib ) / ) - | $only_for_interior_links_re - }x) { - if ($found eq 'found_some_pod_line') { - for (; $i < @contents; $i++) { - next if $contents[$i] !~ /^=head1/; - $found = 'found_NAME' - if $contents[$i] =~ /^=head1 +NAME/; - last; - } - } - if ($found eq 'found_NAME') { - $i++; # The NAME starts on a later line - - # Skip empty lines - while ($contents[$i] !~ /\S/) { $i++ } - - # The NAME is the first non-spaces on the line up to a - # comma, dash or end of line. Otherwise, it's invalid and - # this pod doesn't have a legal name that we're smart - # enough to find currently. But the parser will later - # find it if it thinks there is a legal name, and set the - # name - if ($contents[$i] =~ /^ \s* ( \S+?) \s* (?: [,-] | $ )/x) { - my $name = $1; - $checker->name($name); - $id_to_checker{$name} = $checker - if $filename =~ m{^cpan/}; - } - } - elsif ($filename =~ m{^cpan/}) { - $id_to_checker{$digests{$filename}} = $checker; + }x) { + if ($contents =~ /^=head1 +NAME.*/mg) { + # The NAME is the first non-spaces on the line up to a + # comma, dash or end of line. Otherwise, it's invalid and + # this pod doesn't have a legal name that we're smart + # enough to find currently. But the parser will later + # find it if it thinks there is a legal name, and set the + # name + if ($contents =~ /\G # continue from the line after =head1 + \s* # ignore any empty lines + ^ \s* ( \S+?) \s* (?: [,-] | $ )/mx) { + my $name = $1; + $checker->name($name); + $id_to_checker{$name} = $checker + if $filename =~ m{^cpan/}; } } + elsif ($filename =~ m{^cpan/}) { + $id_to_checker{$digests{$filename}} = $checker; + } } } + + return; } # End of is_pod_file() # Start of real code that isn't processing the command line.