X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/b3fdb838d589962e1c590dffd9540666e53c7e21..03ca349ac05d45c55183105f95dc1784f97aa4e3:/t/porting/podcheck.t

diff --git a/t/porting/podcheck.t b/t/porting/podcheck.t
index 35c9d64..b1022e1 100644
--- a/t/porting/podcheck.t
+++ b/t/porting/podcheck.t
@@ -213,8 +213,17 @@ my $encoding_first = "=encoding must be first command (if present)";
 my $no_name = "There is no NAME";
 my $missing_name_description = "The NAME should have a dash and short description after it";
 
-# objects, tests, etc can't be pods, so don't look for them.
-my $non_pods = qr/\.(?:[achot]|zip|gz|bz2|jar|tar|tgz|PL|so)$/;
+# objects, tests, etc can't be pods, so don't look for them. Also skip
+# files output by the patch program.  Could also ignore most of .gitignore
+# files, but not all, so don't.
+my $non_pods = qr/ (?: \.
+                       (?: [achot]  | zip | gz | bz2 | jar | tar | tgz | PL | so
+                           | orig | rej | patch   # Patch program output
+                           | sw[op] | \#.*  # Editor droppings
+                       )
+                       $
+                    ) | ~$      # Another editor dropping
+                /x;
 
 
 # Pod::Checker messages to suppress
@@ -288,10 +297,18 @@ sub suppressed {
 }
 
 # This is to get this to work across multiple file systems, including those
-# that are not case sensitive.  The db is stored in lower case, and all file
-# name comparisons are done that way.
+# that are not case sensitive.  The db is stored in lower case, Un*x style,
+# and all file name comparisons are done that way.
 sub canonicalize($) {
-    return lc File::Spec->canonpath(shift);
+    my $input = shift;
+    my ($volume, $directories, $file)
+                    = File::Spec->splitpath(File::Spec->canonpath($input));
+    # Assumes $volume is constant for everything in this directory structure
+    $directories = "" if ! $directories;
+    $file = "" if ! $file;
+    my $output = lc join '/', File::Spec->splitdir($directories), $file;
+    $output =~ s! / /+ !/!gx;       # Multiple slashes => single slash
+    return $output;
 }
 
 
@@ -414,7 +431,7 @@ package My::Pod::Checker {      # Extend Pod::Checker
     # re's for messages that Pod::Checker outputs
     my $location = qr/ \b (?:in|at|on|near) \s+ /xi;
     my $optional_location = qr/ (?: $location )? /xi;
-    my $line_reference = qr/ [('"]? $optional_location line\
+    my $line_reference = qr/ [('"]? $optional_location \b line \s+
                              (?: \d+ | EOF | \Q???\E | - )
                              [)'"]? /xi;
 
@@ -533,17 +550,23 @@ package My::Pod::Checker {      # Extend Pod::Checker
 
         $self->SUPER::verbatim($paragraph, $line_num, $pod_para);
 
+        my $addr = Scalar::Util::refaddr $self;
+
         # Pick up the name, since the parent class doesn't in verbatim
         # NAMEs; so treat as non-verbatim.  The parent class only allows one
         # paragraph in a NAME section, so if there is an extra blank line, it
         # will trigger a message, but such a blank line is harmless, so skip
         # in that case.
-        if ($in_NAME{Scalar::Util::refaddr $self} && $paragraph =~ /\S/) {
+        if ($in_NAME{$addr} && $paragraph =~ /\S/) {
             $self->textblock($paragraph, $line_num, $pod_para);
         }
 
         my @lines = split /^/, $paragraph;
         for my $i (0 .. @lines - 1) {
+            if ( my $encoding = $seen_encoding_cmd{$addr} ) {
+              require Encode;
+              $lines[$i] = Encode::decode($encoding, $lines[$i]);
+            }
             $lines[$i] =~ s/\s+$//;
             my $indent = $self->get_current_indent;
             my $exceeds = length(Text::Tabs::expand($lines[$i]))
@@ -621,7 +644,7 @@ package My::Pod::Checker {      # Extend Pod::Checker
         }
         elsif ($cmd eq "encoding") {
             my ($file, $line) = $pod_para->file_line;
-            $seen_encoding_cmd{$addr} = 1;
+            $seen_encoding_cmd{$addr} = $paragraph; # for later decoding
             if ($command_count{$addr} != 1 && $seen_pod_cmd{$addr}) {
                 $self->poderror({ -line => $line, -file => $file,
                                   -msg => $encoding_first
@@ -771,7 +794,7 @@ my %files_with_unknown_issues;
 my %files_with_fixes;
 
 my $data_fh;
-open($data_fh, $known_issues) || die "Can't open $known_issues";
+open $data_fh, '<:bytes', $known_issues or die "Can't open $known_issues";
 
 my %counts; # For --counts param, count of each issue type
 my %suppressed_files;   # Files with at least one issue type to suppress
@@ -823,12 +846,16 @@ my %excluded_files = (
                         "configpm" => 1,
                         "miniperl" => 1,
                         "perl" => 1,
+
+                        # It would be nice if we didn't have to skip this,
+                        # but the errors in it are too variable.
+                        "pod/perltoc.pod" => 1,
                     );
 
 # Convert to more generic form.
 foreach my $file (keys %excluded_files) {
-    $excluded_files{canonicalize($excluded_files{$file})}
-                                                    = $excluded_files{$file};
+    delete $excluded_files{$file};
+    $excluded_files{canonicalize($file)} = 1;
 }
 
 # re to match files that are to be parsed only if there is an internal link
@@ -902,7 +929,12 @@ sub extract_pod {   # Extracts just the pod from a file
     # Arrange for the output of Pod::Parser to be collected in an array we can
     # look at instead of being printed
     tie *ALREADY_FH, 'Tie_Array_to_FH', \@pod;
-    open my $in_fh, '<', $filename
+    open my $in_fh, '<:bytes', $filename
+
+        # The file should already have been opened once to get here, so if
+        # fails, just die.  It's possible that a transitory file containing a
+        # pod would get here, but not bothering to add code for that very
+        # unlikely event.
         or die "Can't open '$filename': $!\n";
 
     my $parser = Pod::Parser->new();
@@ -915,6 +947,8 @@ sub extract_pod {   # Extracts just the pod from a file
 my $digest = Digest->new($digest_type);
 
 sub is_pod_file {
+    # If $_ is a pod file, add it to the lists and do other prep work.
+
     if (-d $_) {
         # Don't look at files in directories that are for tests, nor those
         # beginning with a dot
@@ -934,94 +968,90 @@ sub is_pod_file {
 
     return if $excluded_files{canonicalize($filename)};
 
-    open my $candidate, '<', $_
-        or die "Can't open '$File::Find::name': $!\n";
-    my @contents = <$candidate>;
-    close $candidate;
+    my $contents = do {
+        local $/;
+        my $candidate;
+        if (! open $candidate, '<:bytes', $_) {
+
+            # If a transitory file was found earlier, the open could fail
+            # legitimately and we just skip the file; also skip it if it is a
+            # broken symbolic link, as it is probably just a build problem;
+            # certainly not a file that we would want to check the pod of.
+            # Otherwise fail it here and no reason to process it further.
+            # (But the test count will be off too)
+            ok(0, "Can't open '$filename': $!")
+                                            if -e $filename && ! -l $filename;
+            return;
+        }
+        <$candidate>;
+    };
 
     # If the file is a .pm or .pod, having any initial '=' on a line is
     # grounds for testing it.  Otherwise, require a head1 NAME line to view it
     # as a potential pod
-    my $i;
-    my $found = "";
-    for ($i = 0; $i < @contents; $i++) {
-        next unless $contents[$i] =~ /^=/;
-        if ($filename =~ /\.(?:pm|pod)/) {
-            $found = 'found_some_pod_line';
-            last;
-        }
-        elsif ($contents[$i] =~ /^=head1 +NAME/) {
-            $found = 'found_NAME';
-            last;
-        }
+    if ($filename =~ /\.(?:pm|pod)/) {
+        return unless $contents =~ /^=/m;
+    } else {
+        return unless $contents =~ /^=head1 +NAME/m;
     }
-    if ($found) {
-        # Here, we know that the file is a pod.  Add it to the list of files
-        # to check and create a checker object for it.
 
-        push @files, $filename;
-        my $checker = My::Pod::Checker->new($filename);
-        $filename_to_checker{$filename} = $checker;
-
-        # In order to detect duplicate pods and only analyze them once, we
-        # compute checksums for the file, so don't have to do an exact
-        # compare.  Note that if the pod is just part of the file, the
-        # checksums can differ for the same pod.  That special case is handled
-        # later, since if the checksums of the whole file are the same, that
-        # case won't even come up.  We don't need the checksums for files that
-        # we parse only if there is a link to its interior, but we do need its
-        # NAME, which is also retrieved in the code below.
-        if ($filename =~ / (?: ^(cpan|lib|ext|dist)\/ )
+    # Here, we know that the file is a pod.  Add it to the list of files
+    # to check and create a checker object for it.
+
+    push @files, $filename;
+    my $checker = My::Pod::Checker->new($filename);
+    $filename_to_checker{$filename} = $checker;
+
+    # In order to detect duplicate pods and only analyze them once, we
+    # compute checksums for the file, so don't have to do an exact
+    # compare.  Note that if the pod is just part of the file, the
+    # checksums can differ for the same pod.  That special case is handled
+    # later, since if the checksums of the whole file are the same, that
+    # case won't even come up.  We don't need the checksums for files that
+    # we parse only if there is a link to its interior, but we do need its
+    # NAME, which is also retrieved in the code below.
+
+    if ($filename =~ / (?: ^(cpan|lib|ext|dist)\/ )
+                        | $only_for_interior_links_re
+                    /x) {
+        $digest->add($contents);
+        $digests{$filename} = $digest->digest;
+
+        # lib files aren't analyzed if they are duplicates of files copied
+        # there from some other directory.  But to determine this, we need
+        # to know their NAMEs.  We might as well find the NAME now while
+        # the file is open.  Similarly, cpan files aren't analyzed unless
+        # we're analyzing all of them, or this particular file is linked
+        # to by a file we are analyzing, and thus we will want to verify
+        # that the target exists in it.  We need to know at least the NAME
+        # to see if it's worth analyzing, or so we can determine if a lib
+        # file is a copy of a cpan one.
+        if ($filename =~ m{ (?: ^ (?: cpan | lib ) / )
                             | $only_for_interior_links_re
-                        /x) {
-            $digest->add(@contents);
-            $digests{$filename} = $digest->digest;
-
-            # lib files aren't analyzed if they are duplicates of files copied
-            # there from some other directory.  But to determine this, we need
-            # to know their NAMEs.  We might as well find the NAME now while
-            # the file is open.  Similarly, cpan files aren't analyzed unless
-            # we're analyzing all of them, or this particular file is linked
-            # to by a file we are analyzing, and thus we will want to verify
-            # that the target exists in it.  We need to know at least the NAME
-            # to see if it's worth analyzing, or so we can determine if a lib
-            # file is a copy of a cpan one.
-            if ($filename =~ m{ (?: ^ (?: cpan | lib ) / )
-                                | $only_for_interior_links_re
-                                }x) {
-                if ($found eq 'found_some_pod_line') {
-                    for (;  $i < @contents; $i++) {
-                        next if $contents[$i] !~ /^=head1/;
-                        $found = 'found_NAME'
-                                        if $contents[$i] =~ /^=head1 +NAME/;
-                        last;
-                    }
-                }
-                if ($found eq 'found_NAME') {
-                    $i++;   # The NAME starts on a later line
-
-                    # Skip empty lines
-                    while ($contents[$i] !~ /\S/) { $i++ }
-
-                    # The NAME is the first non-spaces on the line up to a
-                    # comma, dash or end of line.  Otherwise, it's invalid and
-                    # this pod doesn't have a legal name that we're smart
-                    # enough to find currently.  But the  parser will later
-                    # find it if it thinks there is a legal name, and set the
-                    # name
-                    if ($contents[$i] =~ /^ \s* ( \S+?) \s* (?: [,-] | $ )/x) {
-                        my $name = $1;
-                        $checker->name($name);
-                        $id_to_checker{$name} = $checker
-                                                if $filename =~ m{^cpan/};
-                    }
-                }
-                elsif ($filename =~ m{^cpan/}) {
-                    $id_to_checker{$digests{$filename}} = $checker;
+                            }x) {
+            if ($contents =~ /^=head1 +NAME.*/mg) {
+                # The NAME is the first non-spaces on the line up to a
+                # comma, dash or end of line.  Otherwise, it's invalid and
+                # this pod doesn't have a legal name that we're smart
+                # enough to find currently.  But the  parser will later
+                # find it if it thinks there is a legal name, and set the
+                # name
+                if ($contents =~ /\G    # continue from the line after =head1
+                                  \s*   # ignore any empty lines
+                                  ^ \s* ( \S+?) \s* (?: [,-] | $ )/mx) {
+                    my $name = $1;
+                    $checker->name($name);
+                    $id_to_checker{$name} = $checker
+                        if $filename =~ m{^cpan/};
                 }
             }
+            elsif ($filename =~ m{^cpan/}) {
+                $id_to_checker{$digests{$filename}} = $checker;
+            }
         }
     }
+
+    return;
 } # End of is_pod_file()
 
 # Start of real code that isn't processing the command line.