fix NAME of perl5125delta.pod

[perl5.git] / t / porting / podcheck.t
diff --git a/t/porting/podcheck.t b/t/porting/podcheck.t

index 644f583..4cea599 100644 (file)
--- a/t/porting/podcheck.t
+++ b/t/porting/podcheck.t
@@ -200,7 +200,7 @@ For example,
      ./perl -I../lib porting/podcheck.t --add_link Unicode::Casing
  
  causes the external module "Unicode::Casing" to be added to the data base, so
-C<LE<lt>Unicode::Casing<gt>> will be considered valid.
+C<LE<lt>Unicode::CasingE<gt>> will be considered valid.
  
  =item --regen
  
@@ -247,6 +247,17 @@ L<Pod::Checker>
  
  =cut
  
+# VMS builds have a '.com' appended to utility and script names, and it adds a
+# trailing dot for any other file name that doesn't have a dot in it.  The db
+# is stored without those things.  This regex allows for these special file
+# names to be dealt with.  It needs to be interpolated into a larger regex
+# that furnishes the closing boundary.
+my $vms_re = qr/ \. (?: com )? /x;
+
+# Some filenames in the MANIFEST match $vms_re, and so must not be handled the
+# same way that that the special vms ones are.  This hash lists those.
+my %special_vms_files;
+
  # This is to get this to work across multiple file systems, including those
  # that are not case sensitive.  The db is stored in lower case, Un*x style,
  # and all file name comparisons are done that way.
@@ -257,10 +268,21 @@ sub canonicalize($) {
      # Assumes $volume is constant for everything in this directory structure
      $directories = "" if ! $directories;
      $file = "" if ! $file;
-    $file =~ s/\.$// if $^O eq 'VMS';
-    my $output = lc join '/', File::Spec->splitdir($directories), $file;
-    $output =~ s! / /+ !/!gx;       # Multiple slashes => single slash
-    return $output;
+    $file = lc join '/', File::Spec->splitdir($directories), $file;
+    $file =~ s! / /+ !/!gx;       # Multiple slashes => single slash
+
+    # The db is stored without the special suffixes that are there in VMS, so
+    # strip them off to get the comparable name.  But some files on all
+    # platforms have these suffixes, so this shouldn't happen for them, as any
+    # of their db entries will have the suffixes in them.  The hash has been
+    # populated with these files.
+    if ($^O eq 'VMS'
+        && $file =~ / ( $vms_re ) $ /x
+        && ! exists $special_vms_files{$file})
+    {
+        $file =~ s/ $1 $ //x;
+    }
+    return $file;
  }
  
  #####################################################
@@ -336,11 +358,17 @@ my $dl_ext  = $Config{'dlext'};   $dl_ext  =~ tr/.//d;
  # Not really pods, but can look like them.
  my %excluded_files = (
                          canonicalize("lib/unicore/mktables") => 1,
+                        canonicalize("Porting/make-rmg-checklist") => 1,
                          canonicalize("Porting/perldelta_template.pod") => 1,
+                        canonicalize("regen/feature.pl") => 1,
                          canonicalize("autodoc.pl") => 1,
                          canonicalize("configpm") => 1,
                          canonicalize("miniperl") => 1,
                          canonicalize("perl") => 1,
+                        canonicalize('cpan/Pod-Perldoc/corpus/no-head.pod') => 1,
+                        canonicalize('cpan/Pod-Perldoc/corpus/perlfunc.pod') => 1,
+                        canonicalize('cpan/Pod-Perldoc/corpus/utf8.pod') => 1,
+                        canonicalize("lib/unicore/mktables") => 1,
                      );
  
  # This list should not include anything for which case sensitivity is
@@ -371,6 +399,8 @@ my $non_pods = qr/ (?: \.
                             | $lib_ext # object libraries
                             | $lib_so  # shared libraries
                             | $dl_ext  # dynamic libraries
+                           | gif      # GIF images (example files from CGI.pm)
+                           | eg       # examples from libnet
                         )
                         $
                      ) | ~$ | \ \(Autosaved\)\.txt$ # Other editor droppings
@@ -388,6 +418,13 @@ my $non_pods = qr/ (?: \.
  # files with the same names except for case.
  open my $manifest_fh, '<:bytes', $MANIFEST or die "Can't open $MANIFEST";
  while (<$manifest_fh>) {
+
+    # While we have MANIFEST open, on VMS platforms, look for files that match
+    # the magic VMS file names that have to be handled specially.  Add these
+    # to the list of them.
+    if ($^O eq 'VMS' && / ^ ( [^\t]* $vms_re ) \t /x) {
+        $special_vms_files{$1} = 1;
+    }
      if (/ ^ ( [^\t]* \. PL ) \t /x) {
          $excluded_files{canonicalize($1)} = 1;
      }
@@ -421,7 +458,8 @@ sub suppressed {
  
      sub plan {
          my %plan = @_;
-        $planned = $plan{tests};
+        $planned = $plan{tests} + 1;    # +1 for final test that files haven't
+                                        # been removed
          print "1..$planned\n";
          return;
      }
@@ -435,7 +473,7 @@ sub suppressed {
          $current_test++;
          print "not " unless $success;
          print "ok $current_test - $message\n";
-        return;
+        return $success;
      }
  
      sub skip {
@@ -999,6 +1037,27 @@ package My::Pod::Checker {      # Extend Pod::Checker
          delete $problems{$self->get_filename};
          return;
      }
+
+    sub parse_from_file {
+        # This overrides the super class method so that if an open fails on a
+        # transitory file, it doesn't croak.  It returns 1 if it did find the
+        # file, 0 if it didn't
+
+        my $self = shift;
+        my $filename = shift;
+        # ignores 2nd param, which is output file.  Always uses undef
+
+        if (open my $in_fh, '<:bytes', $filename) {
+            $self->SUPER::parse_from_filehandle($in_fh, undef);
+            close $in_fh;
+            return 1;
+        }
+
+        # If couldn't open file, perhaps it was transitory, and hence not an error
+        return 0 unless -e $filename;
+
+        die "Can't open '$filename': $!\n";
+    }
  }
  
  package Tie_Array_to_FH {  # So printing actually goes to an array
@@ -1192,7 +1251,8 @@ sub my_safer_print {    # print, with error checking for outputting to db
      }
  }
  
-sub extract_pod {   # Extracts just the pod from a file
+sub extract_pod {   # Extracts just the pod from a file; returns undef if file
+                    # doesn't exist
      my $filename = shift;
  
      my @pod;
@@ -1200,45 +1260,53 @@ sub extract_pod {   # Extracts just the pod from a file
      # Arrange for the output of Pod::Parser to be collected in an array we can
      # look at instead of being printed
      tie *ALREADY_FH, 'Tie_Array_to_FH', \@pod;
-    open my $in_fh, '<:bytes', $filename
+    if (open my $in_fh, '<:bytes', $filename) {
+        my $parser = Pod::Parser->new();
+        $parser->parse_from_filehandle($in_fh, *ALREADY_FH);
+        close $in_fh;
  
-        # The file should already have been opened once to get here, so if
-        # fails, just die.  It's possible that a transitory file containing a
-        # pod would get here, but not bothering to add code for that very
-        # unlikely event.
-        or die "Can't open '$filename': $!\n";
-
-    my $parser = Pod::Parser->new();
-    $parser->parse_from_filehandle($in_fh, *ALREADY_FH);
-    close $in_fh;
+        return join "", @pod
+    }
  
-    return join "", @pod
+    # The file should already have been opened once to get here, so if that
+    # fails, something is wrong.  It's possible that a transitory file
+    # containing a pod would get here, so if the file no longer exists just
+    # return undef.
+    return unless -e $filename;
+    die "Can't open '$filename': $!\n";
  }
  
  my $digest = Digest->new($digest_type);
  
+# This is used as a callback from File::Find::find(), which always constructs
+# pathnames using Unix separators
  sub is_pod_file {
      # If $_ is a pod file, add it to the lists and do other prep work.
  
-    if (-d $_) {
+    if (-d) {
          # Don't look at files in directories that are for tests, nor those
          # beginning with a dot
-        if ($_ eq 't' || $_ =~ /^\../) {
+        if (m!/t\z! || m!/\.!) {
              $File::Find::prune = 1;
          }
          return;
      }
  
-    if ($_ =~ /^\./           # No hidden Unix files
-        || $_ =~ $non_pods) {
+    return unless -r && -s;    # Can't check it if can't read it; no need to
+                               # check if 0 length
+    return unless -f || -l;    # Weird file types won't be pods
+
+    my ($leaf) = m!([^/]+)\z!;
+    if (m!/\.!                 # No hidden Unix files
+        || $leaf =~ $non_pods) {
          note("Not considering $_") if DEBUG;
          return;
      }
                 
      my $filename = $File::Find::name;
  
-    # Assumes that the path separator is exactly one character.
-    $filename =~ s/^\..//;
+    # $filename is relative, like './path'.  Strip that initial part away.
+    $filename =~ s!^\./!! or die 'Unexpected pathname "$filename"';
  
      return if $excluded_files{canonicalize($filename)};
  
@@ -1254,7 +1322,7 @@ sub is_pod_file {
              # Otherwise fail it here and no reason to process it further.
              # (But the test count will be off too)
              ok(0, "Can't open '$filename': $!")
-                                            if -e $filename && ! -l $filename;
+                                            if -r $filename && ! -l $filename;
              return;
          }
          <$candidate>;
@@ -1353,7 +1421,7 @@ else { # No input files -- go find all the possibilities.
      chdir File::Spec->updir;
  
      # And look in this directory and all its subdirectories
-    find( \&is_pod_file, '.');
+    find( {wanted => \&is_pod_file, no_chdir => 1}, '.');
  
      # Add ourselves to the test
      push @files, "t/porting/podcheck.t";
@@ -1408,6 +1476,7 @@ plan (tests => scalar @files) if ! $regen;
              @files;
  
  # Now go through all the files and parse them
+FILE:
  foreach my $filename (@files) {
      my $parsed = 0;
      note("parsing $filename") if DEBUG;
@@ -1424,8 +1493,12 @@ foreach my $filename (@files) {
      # We have set the name in the checker object if there is a possibility
      # that no further parsing is necessary, but otherwise do the parsing now.
      if (! $checker->name) {
+        if (! $checker->parse_from_file($filename, undef)) {
+            $checker->set_skip("$filename is transitory");
+            next FILE;
+        }
          $parsed = 1;
-        $checker->parse_from_file($filename, undef);
+
      }
  
      if ($checker->num_errors() < 0) {   # Returns negative if not a pod
@@ -1444,7 +1517,12 @@ foreach my $filename (@files) {
          }
          else {
              my $digest = Digest->new($digest_type);
-            $digest->add(extract_pod($filename));
+            my $contents = extract_pod($filename);
+
+            # If the return is undef, it means that $filename was a transitory
+            # file; skip it.
+            next FILE unless defined $contents;
+            $digest->add($contents);
              $id = $digest->digest;
          }
  
@@ -1470,7 +1548,28 @@ foreach my $filename (@files) {
              # reason, but the pods they contain are identical.  Extract the
              # pods and do the comparisons on just those.
              if (! $same && $name) {
-                $same = extract_pod($prior_filename) eq extract_pod($filename);
+                my $contents = extract_pod($filename);
+
+                # If return is <undef>, it means that $filename no longer
+                # exists.  This means it was a transitory file, and should not
+                # be tested.
+                next FILE unless defined $contents;
+
+                my $prior_contents = extract_pod($prior_filename);
+
+                # If return is <undef>, it means that $prior_filename no
+                # longer exists.  This means it was a transitory file, and
+                # should not have been tested, but we already did process it.
+                # What we should do now is to back-out its records, and
+                # process $filename in its stead.  But backing out is not so
+                # simple, and so I'm (khw) skipping that unless and until
+                # experience shows that it is needed.  We do go process
+                # $filename, and there are potential false positive conflicts
+                # with the transitory $prior_contents, and rerunning the test
+                # should cause it to succeed.
+                goto process_this_pod unless defined $prior_contents;
+
+                $same = $prior_contents eq $contents;
              }
  
              if ($same) {
@@ -1504,9 +1603,11 @@ foreach my $filename (@files) {
  
              # In any event, don't process this pod that has the same name as
              # another.
-            next;
+            next FILE;
          }
  
+    process_this_pod:
+
          # A unique pod.
          $id_to_checker{$id} = $checker;
  
@@ -1517,8 +1618,10 @@ foreach my $filename (@files) {
              if ($filename =~ /^cpan/) {
                  $checker->set_skip("CPAN is upstream for $filename");
              }
-            elsif ($filename =~ /perl\d+delta/ && ! $do_deltas) {
-                $checker->set_skip("$filename is a stable perldelta");
+            elsif ($filename =~ /perl\d+delta/) {
+                if (! $do_deltas) {
+                    $checker->set_skip("$filename is a stable perldelta");
+                }
              }
              elsif ($filename =~ /perltoc/) {
                  $checker->set_skip("$filename dependent on component pods");
@@ -1538,7 +1641,7 @@ foreach my $filename (@files) {
              $checker->poderror( { -msg => $no_name,
                                    -line => '???'
                                  });
-            next;
+            next FILE;
          }
  
          # For skipped files, just get its NAME
@@ -1547,14 +1650,17 @@ foreach my $filename (@files) {
          {
              $checker->node($name) if $name;
          }
-        else {
-            $checker->parse_from_file($filename, undef) if ! $parsed;
+        elsif (! $parsed) {
+            if (! $checker->parse_from_file($filename, undef)) {
+                $checker->set_skip("$filename is transitory");
+                next FILE;
+            }
          }
  
          # Go through everything in the file that could be an anchor that
          # could be a link target.  Count how many there are of the same name.
          foreach my $node ($checker->linkable_nodes) {
-            next if ! $node;        # Can be empty is like '=item *'
+            next FILE if ! $node;        # Can be empty is like '=item *'
              if (exists $nodes{$name}{$node}) {
                  $nodes{$name}{$node}++;
              }
@@ -1647,7 +1753,9 @@ if (! $has_input_files) {
  }
  
  # If regenerating the data file, start with the modules for which we don't
-# check targets
+# check targets.  If you change the sort order, you need to run --regen before
+# committing so that future commits that do run regen don't show irrelevant
+# changes.
  if ($regen) {
      foreach (sort { lc $a cmp lc $b } keys %valid_modules) {
          my_safer_print($copy_fh, $_, "\n");
@@ -1734,9 +1842,19 @@ foreach my $filename (@files) {
              note(join "", @diagnostics,
              "See end of this test output for your options on silencing this");
          }
+
+        delete $known_problems{$canonical};
      }
  }
  
+if (! ok (keys %known_problems == 0, "The known problems data base includes no references to non-existent files")) {
+    note("The following files were not found: "
+         . join ", ", keys %known_problems);
+    note("They will automatically be removed from the db the next time");
+    note("  cd t; ./perl -I../lib porting/podcheck.t --regen");
+    note("is run");
+}
+
  my $how_to = <<EOF;
     run this test script by hand, using the following formula (on
     Un*x-like machines):