podcheck.t: Add comment

[perl5.git] / t / porting / podcheck.t
diff --git a/t/porting/podcheck.t b/t/porting/podcheck.t

index 2bfddfd..f0d5811 100644 (file)
--- a/t/porting/podcheck.t
+++ b/t/porting/podcheck.t
@@ -1,10 +1,16 @@
  #!/usr/bin/perl -w
  
+BEGIN {
+    chdir 't';
+    unshift @INC, "../lib";
+}
+
  use strict;
  use warnings;
  use feature 'unicode_strings';
  
  use Carp;
+use Config;
  use Digest;
  use File::Find;
  use File::Spec;
@@ -77,7 +83,7 @@ It's annoying to have lines wrap when displaying pod documentation in a
  terminal window.  This checks that all verbatim lines fit in a standard 80
  column window, even when using a pager that reserves a column for its own use.
  (Thus the check is for a net of 79 columns.)
-For those that lines that don't fit, it tells you how much needs to be cut in
+For those lines that don't fit, it tells you how much needs to be cut in
  order to fit.
  
  Often, the easiest thing to do to gain space for these is to lower the indent
@@ -109,11 +115,13 @@ A number of issues raised by podcheck.t and by the base Pod::Checker are not
  really problems, but merely potential problems, that is, false positives.
  After inspecting them and
  deciding that they aren't real problems, it is possible to shut up this program
-about them, unlike base Pod::Checker.  To do this, call podcheck.t with the
-C<--regen> option to regenerate the database.  This tells it that all existing
+about them, unlike base Pod::Checker.  For a valid link to an outside module
+or man page, call podcheck.t with the C<--add_link> option to add it to the
+the database of known links; for other causes, call podcheck.t with the C<--regen>
+option to regenerate the entire database.  This tells it that all existing
  issues are to not be mentioned again.
  
-This isn't fool-proof.  The database merely keeps track of the number of these
+C<--regen> isn't fool-proof.  The database merely keeps track of the number of these
  potential problems of each type for each pod.  If a new problem of a given
  type is introduced into the pod, podcheck.t will spit out all of them.  You
  then have to figure out which is the new one, and should it be changed or not.
@@ -131,7 +139,8 @@ the number of those problems for that pod to a negative number.  This will
  cause the corresponding error to always be suppressed no matter how many there
  actually are.
  
-There is currently no check that modules listed as valid in the data base
+Another problem is that there is currently no check that modules listed as
+valid in the data base
  actually are.  Thus any errors introduced there will remain there.
  
  =head2 Specially handled pods
@@ -143,7 +152,7 @@ actually are.  Thus any errors introduced there will remain there.
  This pod is generated by pasting bits from other pods.  Errors in those bits
  will show up as errors here, as well as for those other pods.  Therefore
  errors here are suppressed, and the pod is checked only to verify that nodes
-within it that are externally linked to actually exist.
+within it actually exist that are externally linked to.
  
  =item perldelta
  
@@ -238,6 +247,44 @@ L<Pod::Checker>
  
  =cut
  
+# VMS builds have a '.com' appended to utility and script names, and it adds a
+# trailing dot for any other file name that doesn't have a dot in it.  The db
+# is stored without those things.  This regex allows for these special file
+# names to be dealt with.  It needs to be interpolated into a larger regex
+# that furnishes the closing boundary.
+my $vms_re = qr/ \. (?: com )? /x;
+
+# Some filenames in the MANIFEST match $vms_re, and so must not be handled the
+# same way that that the special vms ones are.  This hash lists those.
+my %special_vms_files;
+
+# This is to get this to work across multiple file systems, including those
+# that are not case sensitive.  The db is stored in lower case, Un*x style,
+# and all file name comparisons are done that way.
+sub canonicalize($) {
+    my $input = shift;
+    my ($volume, $directories, $file)
+                    = File::Spec->splitpath(File::Spec->canonpath($input));
+    # Assumes $volume is constant for everything in this directory structure
+    $directories = "" if ! $directories;
+    $file = "" if ! $file;
+    $file = lc join '/', File::Spec->splitdir($directories), $file;
+    $file =~ s! / /+ !/!gx;       # Multiple slashes => single slash
+
+    # The db is stored without the special suffixes that are there in VMS, so
+    # strip them off to get the comparable name.  But some files on all
+    # platforms have these suffixes, so this shouldn't happen for them, as any
+    # of their db entries will have the suffixes in them.  The hash has been
+    # populated with these files.
+    if ($^O eq 'VMS'
+        && $file =~ / ( $vms_re ) $ /x
+        && ! exists $special_vms_files{$file})
+    {
+        $file =~ s/ $1 $ //x;
+    }
+    return $file;
+}
+
  #####################################################
  # HOW IT WORKS (in general)
  #
@@ -268,7 +315,8 @@ L<Pod::Checker>
  # has many false positives; higher numbers give more messages.
  my $Warnings_Level = 200;
  
-# perldelta during construction may have place holder links.
+# perldelta during construction may have place holder links.  N.B.  This
+# variable is referred to by name in release_managers_guide.pod
  our @perldelta_ignore_links = ( "XXX", "perl5YYYdelta", "perldiag/message" );
  
  # To see if two pods with the same NAME are actually copies of the same pod,
@@ -278,6 +326,7 @@ my $digest_type = "SHA-1";
  my $original_dir = File::Spec->rel2abs(File::Spec->curdir);
  my $data_dir = File::Spec->catdir($original_dir, 'porting');
  my $known_issues = File::Spec->catfile($data_dir, 'known_pod_issues.dat');
+my $MANIFEST = File::Spec->catfile(File::Spec->updir($original_dir), 'MANIFEST');
  my $copy_fh;
  
  my $MAX_LINE_LENGTH = 79;   # 79 columns
@@ -300,16 +349,86 @@ my $missing_name_description = "The NAME should have a dash and short descriptio
  # objects, tests, etc can't be pods, so don't look for them. Also skip
  # files output by the patch program.  Could also ignore most of .gitignore
  # files, but not all, so don't.
+
+my $obj_ext = $Config{'obj_ext'}; $obj_ext =~ tr/.//d; # dot will be added back
+my $lib_ext = $Config{'lib_ext'}; $lib_ext =~ tr/.//d;
+my $lib_so  = $Config{'so'};      $lib_so  =~ tr/.//d;
+my $dl_ext  = $Config{'dlext'};   $dl_ext  =~ tr/.//d;
+
+# Not really pods, but can look like them.
+my %excluded_files = (
+                        canonicalize("lib/unicore/mktables") => 1,
+                        canonicalize("Porting/make-rmg-checklist") => 1,
+                        canonicalize("Porting/perldelta_template.pod") => 1,
+                        canonicalize("regen/feature.pl") => 1,
+                        canonicalize("autodoc.pl") => 1,
+                        canonicalize("configpm") => 1,
+                        canonicalize("miniperl") => 1,
+                        canonicalize("perl") => 1,
+                        canonicalize('dist/Pod-Perldoc/corpus/no-head.pod') => 1,
+                        canonicalize('dist/Pod-Perldoc/corpus/perlfunc.pod') => 1,
+                        canonicalize('dist/Pod-Perldoc/corpus/utf8.pod') => 1,
+                        canonicalize("lib/unicore/mktables") => 1,
+                    );
+
+# This list should not include anything for which case sensitivity is
+# important, as it won't work on VMS, and won't show up until tested on VMS.
+# All or almost all such files should be listed in the MANIFEST, so that can
+# be examined for them, and each such file explicitly excluded, as is done for
+# .PL files in the loop just below this.  For files not catchable this way,
+# is_pod_file() can be used to exclude these at a finer grained level.
  my $non_pods = qr/ (?: \.
-                       (?: [achot]  | zip | gz | bz2 | jar | tar | tgz | PL | so
+                       (?: [achot]  | zip | gz | bz2 | jar | tar | tgz
                             | orig | rej | patch   # Patch program output
                             | sw[op] | \#.*  # Editor droppings
                             | old      # buildtoc output
+                           | xs       # pod should be in the .pm file
+                           | al       # autosplit files
+                           | bs       # bootstrap files
+                           | (?i:sh)  # shell scripts, hints, templates
+                           | lst      # assorted listing files
+                           | bat      # Windows,Netware,OS2 batch files
+                           | cmd      # Windows,Netware,OS2 command files
+                           | lis      # VMS compiler listings
+                           | map      # VMS linker maps
+                           | opt      # VMS linker options files
+                           | mms      # MM(K|S) description files
+                           | ts       # timestamp files generated during build
+                           | $obj_ext # object files
+                           | exe      # $Config{'exe_ext'} might be empty string
+                           | $lib_ext # object libraries
+                           | $lib_so  # shared libraries
+                           | $dl_ext  # dynamic libraries
                         )
                         $
                      ) | ~$ | \ \(Autosaved\)\.txt$ # Other editor droppings
+                           | ^cxx\$demangler_db\.$ # VMS name mangler database
+                           | ^typemap\.?$          # typemap files
+                           | ^(?i:Makefile\.PL)$
                  /x;
  
+# '.PL' files should be excluded, as they aren't final pods, but often contain
+# material used in generating pods, and so can look like a pod.  We can't use
+# the regexp above because case sensisitivity is important for these, as some
+# '.pl' files should be examined for pods.  Instead look through the MANIFEST
+# for .PL files and get their full path names, so we can exclude each such
+# file explicitly.  This works because other porting tests prohibit having two
+# files with the same names except for case.
+open my $manifest_fh, '<:bytes', $MANIFEST or die "Can't open $MANIFEST";
+while (<$manifest_fh>) {
+
+    # While we have MANIFEST open, on VMS platforms, look for files that match
+    # the magic VMS file names that have to be handled specially.  Add these
+    # to the list of them.
+    if ($^O eq 'VMS' && / ^ ( [^\t]* $vms_re ) \t /x) {
+        $special_vms_files{$1} = 1;
+    }
+    if (/ ^ ( [^\t]* \. PL ) \t /x) {
+        $excluded_files{canonicalize($1)} = 1;
+    }
+}
+close $manifest_fh, or die "Can't close $MANIFEST";
+
  
  # Pod::Checker messages to suppress
  my @suppressed_messages = (
@@ -318,7 +437,7 @@ my @suppressed_messages = (
                                              # that are real problems.
      "unescaped <>",
      "Entity number out of range",   # Checker outputs this for anything above
-                                    # 255, and all Unicode is valid
+                                    # 255, but in fact all Unicode is valid
  );
  
  sub suppressed {
@@ -383,23 +502,6 @@ sub suppressed {
      }
  }
  
-# This is to get this to work across multiple file systems, including those
-# that are not case sensitive.  The db is stored in lower case, Un*x style,
-# and all file name comparisons are done that way.
-sub canonicalize($) {
-    my $input = shift;
-    my ($volume, $directories, $file)
-                    = File::Spec->splitpath(File::Spec->canonpath($input));
-    # Assumes $volume is constant for everything in this directory structure
-    $directories = "" if ! $directories;
-    $file = "" if ! $file;
-    $file =~ s/\.$// if $^O eq 'VMS';
-    my $output = lc join '/', File::Spec->splitdir($directories), $file;
-    $output =~ s! / /+ !/!gx;       # Multiple slashes => single slash
-    return $output;
-}
-
-
  # List of known potential problems by pod and type.
  my %known_problems;
  
@@ -1055,23 +1157,6 @@ if ($show_counts) {
      exit 0;
  }
  
-
-# Not really pods, but can look like them.
-my %excluded_files = (
-                        "lib/unicore/mktables" => 1,
-                        "Porting/perldelta_template.pod" => 1,
-                        "autodoc.pl" => 1,
-                        "configpm" => 1,
-                        "miniperl" => 1,
-                        "perl" => 1,
-                    );
-
-# Convert to more generic form.
-foreach my $file (keys %excluded_files) {
-    delete $excluded_files{$file};
-    $excluded_files{canonicalize($file)} = 1;
-}
-
  # re to match files that are to be parsed only if there is an internal link
  # to them.  It does not include cpan, as whether those are parsed depends
  # on a switch.  Currently, only perltoc and the stable perldelta.pod's
@@ -1170,7 +1255,7 @@ my $digest = Digest->new($digest_type);
  sub is_pod_file {
      # If $_ is a pod file, add it to the lists and do other prep work.
  
-    if (-d $_) {
+    if (-d) {
          # Don't look at files in directories that are for tests, nor those
          # beginning with a dot
          if ($_ eq 't' || $_ =~ /^\../) {
@@ -1179,11 +1264,19 @@ sub is_pod_file {
          return;
      }
  
-    return if $_ =~ /^\./;           # No hidden Unix files
-    return if $_ =~ $non_pods;
+    return unless -r && -s;    # Can't check it if can't read it; no need to
+                               # check if 0 length
+    return unless -f || -l;    # Weird file types won't be pods
  
+    if ($_ =~ /^\./           # No hidden Unix files
+        || $_ =~ $non_pods) {
+        note("Not considering $_") if DEBUG;
+        return;
+    }
+               
      my $filename = $File::Find::name;
  
+    # $filename is relative, like './path'.  Strip that initial part away.
      # Assumes that the path separator is exactly one character.
      $filename =~ s/^\..//;
  
@@ -1201,15 +1294,15 @@ sub is_pod_file {
              # Otherwise fail it here and no reason to process it further.
              # (But the test count will be off too)
              ok(0, "Can't open '$filename': $!")
-                                            if -e $filename && ! -l $filename;
+                                            if -r $filename && ! -l $filename;
              return;
          }
          <$candidate>;
      };
  
      # If the file is a .pm or .pod, having any initial '=' on a line is
-    # grounds for testing it.  Otherwise, require a head1 NAME line to view it
-    # as a potential pod
+    # grounds for testing it.  Otherwise, require a head1 NAME line to
+    # consider it as a potential pod
      if ($filename =~ /\.(?:pm|pod)/) {
          return unless $contents =~ /^=/m;
      } else {
@@ -1323,27 +1416,27 @@ plan (tests => scalar @files) if ! $regen;
   # are done here.
   @files = sort { if ($a =~ /^cpan/) {
                      return 1 if $b !~ /^cpan/;
-                    return $a cmp $b;
+                    return lc $a cmp lc $b;
                  }
                  elsif ($b =~ /^cpan/) {
                      return -1;
                  }
                  elsif ($a =~ /$only_for_interior_links_re/) {
                      return 1 if $b !~ /$only_for_interior_links_re/;
-                    return $a cmp $b;
+                    return lc $a cmp lc $b;
                  }
                  elsif ($b =~ /$only_for_interior_links_re/) {
                      return -1;
                  }
                  elsif ($a =~ /^lib/) {
                      return 1 if $b !~ /^lib/;
-                    return $a cmp $b;
+                    return lc $a cmp lc $b;
                  }
                  elsif ($b =~ /^lib/) {
                      return -1;
                  } elsif ($a =~ /\breadme\b/i) {
                      return 1 if $b !~ /\breadme\b/i;
-                    return $a cmp $b;
+                    return lc $a cmp lc $b;
                  }
                  elsif ($b =~ /\breadme\b/i) {
                      return -1;
@@ -1360,7 +1453,7 @@ foreach my $filename (@files) {
      note("parsing $filename") if DEBUG;
  
      # We may have already figured out some things in the process of generating
-    # the file list.  If so, have a $checker object already.  But if not,
+    # the file list.  If so, we have a $checker object already.  But if not,
      # generate one now.
      my $checker = $filename_to_checker{$filename};
      if (! $checker) {
@@ -1427,7 +1520,10 @@ foreach my $filename (@files) {
                  $checker->set_skip("$prior_filename is a README apparently for $filename");
              } elsif ($filename =~ /\breadme\b/i) {
                  $checker->set_skip("$filename is a README apparently for $prior_filename");
-            } elsif (! $do_upstream_cpan && $filename =~ /^cpan/) {
+            } elsif (! $do_upstream_cpan
+                     && $filename =~ /^cpan/
+                     && $prior_filename =~ /^cpan/)
+            {
                  $checker->set_skip("CPAN is upstream for $filename");
              } else { # Here have two pods with identical names that differ
                  $prior_checker->poderror(
@@ -1591,7 +1687,9 @@ if (! $has_input_files) {
  }
  
  # If regenerating the data file, start with the modules for which we don't
-# check targets
+# check targets.  If you change the sort order, you need to run --regen before
+# committing so that future commits that do run regen don't show irrelevant
+# changes.
  if ($regen) {
      foreach (sort { lc $a cmp lc $b } keys %valid_modules) {
          my_safer_print($copy_fh, $_, "\n");
@@ -1600,7 +1698,6 @@ if ($regen) {
  
  # Now ready to output the messages.
  foreach my $filename (@files) {
-    my $test_name = "POD of $filename";
      my $canonical = canonicalize($filename);
      SKIP: {
          my $skip = $filename_to_checker{$filename}->get_skip // "";
@@ -1618,7 +1715,7 @@ foreach my $filename (@files) {
                  else {
                      $count = @{$problems{$filename}{$message}};
                  }
-                my_safer_print($copy_fh, canonicalize($filename) . "\t$message\t$count\n");
+                my_safer_print($copy_fh, $canonical . "\t$message\t$count\n");
              }
              next;
          }