Underscore the non-official version number

[perl5.git] / lib / Pod / ParseUtils.pm
diff --git a/lib/Pod/ParseUtils.pm b/lib/Pod/ParseUtils.pm

index a66e8f5..13d66ab 100644 (file)
--- a/lib/Pod/ParseUtils.pm
+++ b/lib/Pod/ParseUtils.pm
@@ -1,7 +1,7 @@
  #############################################################################
  # Pod/ParseUtils.pm -- helpers for POD parsing and conversion
  #
-# Copyright (C) 1999 by Marek Rouchal. All rights reserved.
+# Copyright (C) 1999-2000 by Marek Rouchal. All rights reserved.
  # This file is part of "PodParser". PodParser is free software;
  # you can redistribute it and/or modify it under the same terms
  # as Perl itself.
@@ -10,8 +10,8 @@
  package Pod::ParseUtils;
  
  use vars qw($VERSION);
-$VERSION = 0.2;    ## Current version of this package
-require  5.004;    ## requires this Perl version or later
+$VERSION = 1.35;   ## Current version of this package
+require  5.005;    ## requires this Perl version or later
  
  =head1 NAME
  
@@ -49,7 +49,7 @@ The following methods are available:
  
  =over 4
  
-=item new()
+=item Pod::List-E<gt>new()
  
  Create a new list object. Properties may be specified through a hash
  reference like this:
@@ -79,7 +79,7 @@ sub initialize {
      $self->{-type} ||= '';
  }
  
-=item file()
+=item $list-E<gt>file()
  
  Without argument, retrieves the file name the list is in. This must
  have been set before by either specifying B<-file> in the B<new()>
@@ -92,7 +92,7 @@ sub file {
     return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
  }
  
-=item start()
+=item $list-E<gt>start()
  
  Without argument, retrieves the line number where the list started.
  This must have been set before by either specifying B<-start> in the
@@ -106,7 +106,7 @@ sub start {
     return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
  }
  
-=item indent()
+=item $list-E<gt>indent()
  
  Without argument, retrieves the indent level of the list as specified
  in C<=over n>. This must have been set before by either specifying
@@ -120,7 +120,7 @@ sub indent {
     return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
  }
  
-=item type()
+=item $list-E<gt>type()
  
  Without argument, retrieves the list type, which can be an arbitrary value,
  e.g. C<OL>, C<UL>, ... when thinking the HTML way.
@@ -135,7 +135,7 @@ sub type {
     return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
  }
  
-=item rx()
+=item $list-E<gt>rx()
  
  Without argument, retrieves a regular expression for simplifying the 
  individual item strings once the list type has been determined. Usage:
@@ -152,7 +152,7 @@ sub rx {
     return (@_ > 1) ? ($_[0]->{-rx} = $_[1]) : $_[0]->{-rx};
  }
  
-=item item()
+=item $list-E<gt>item()
  
  Without argument, retrieves the array of the items in this list.
  The items may be represented by any scalar.
@@ -172,7 +172,7 @@ sub item {
      }
  }
  
-=item parent()
+=item $list-E<gt>parent()
  
  Without argument, retrieves information about the parent holding this
  list, which is represented as an arbitrary scalar.
@@ -188,7 +188,7 @@ sub parent {
     return (@_ > 1) ? ($_[0]->{-parent} = $_[1]) : $_[0]->{-parent};
  }
  
-=item tag()
+=item $list-E<gt>tag()
  
  Without argument, retrieves information about the list tag, which can be
  any scalar.
@@ -227,7 +227,7 @@ used to construct hyperlinks.
  
  =over 4
  
-=item new()
+=item Pod::Hyperlink-E<gt>new()
  
  The B<new()> method can either be passed a set of key/value pairs or a single
  scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
@@ -269,10 +269,14 @@ sub initialize {
      $self->{_warnings} = [];
  }
  
-=item parse($string)
+=item $link-E<gt>parse($string)
  
  This method can be used to (re)parse a (new) hyperlink, i.e. the contents
  of a C<LE<lt>...E<gt>> sequence. The result is stored in the current object.
+Warnings are stored in the B<warnings> property.
+E.g. sections like C<LE<lt>open(2)E<gt>> are deprecated, as they do not point
+to Perl documents. C<LE<lt>DBI::foo(3p)E<gt>> is wrong as well, the manpage
+section can simply be dropped.
  
  =cut
  
@@ -280,14 +284,13 @@ sub parse {
      my $self = shift;
      local($_) = $_[0];
      # syntax check the link and extract destination
-    my ($alttext,$page,$node,$type) = ('','','','');
+    my ($alttext,$page,$node,$type,$quoted) = (undef,'','','',0);
  
      $self->{_warnings} = [];
  
      # collapse newlines with whitespace
-    if(s/\s*\n+\s*/ /g) {
-        $self->warning("collapsing newlines to blanks");
-    }
+    s/\s*\n+\s*/ /g;
+
      # strip leading/trailing whitespace
      if(s/^[\s\n]+//) {
          $self->warning("ignoring leading whitespace in link");
@@ -305,72 +308,101 @@ sub parse {
      #warn "DEBUG: link=$_\n";
  
      # only page
-    if(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) {
-        $page = $1 . $2;
+    # problem: a lot of people use (), or (1) or the like to indicate
+    # man page sections. But this collides with L<func()> that is supposed
+    # to point to an internal funtion...
+    my $page_rx = '[\w.-]+(?:::[\w.-]+)*(?:[(](?:\d\w*|)[)]|)';
+    # page name only
+    if(m!^($page_rx)$!o) {
+        $page = $1;
          $type = 'page';
      }
-    # alttext, page and section
-    elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) {
-        ($alttext, $page, $node) = ($1, $2 . $3, $4);
+    # alttext, page and "section"
+    elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*"(.+)"$!o) {
+        ($alttext, $page, $node) = ($1, $2, $3);
          $type = 'section';
+        $quoted = 1; #... therefore | and / are allowed
+    }
+    # alttext and page
+    elsif(m!^(.*?)\s*[|]\s*($page_rx)$!o) {
+        ($alttext, $page) = ($1, $2);
+        $type = 'page';
      }
-    # page and section
-    elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*"(.+)"$!) {
-        ($page, $node) = ($1 . $2, $3);
+    # alttext and "section"
+    elsif(m!^(.*?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
+        ($alttext, $node) = ($1,$2);
          $type = 'section';
+        $quoted = 1;
+    }
+    # page and "section"
+    elsif(m!^($page_rx)\s*/\s*"(.+)"$!o) {
+        ($page, $node) = ($1, $2);
+        $type = 'section';
+        $quoted = 1;
      }
      # page and item
-    elsif(m!^(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) {
-        ($page, $node) = ($1 . $2, $3);
+    elsif(m!^($page_rx)\s*/\s*(.+)$!o) {
+        ($page, $node) = ($1, $2);
          $type = 'item';
      }
-    # only section
-    elsif(m!^(?:/\s*|)"(.+)"$!) {
+    # only "section"
+    elsif(m!^/?"(.+)"$!) {
          $node = $1;
          $type = 'section';
+        $quoted = 1;
      }
      # only item
-    elsif(m!^/(.+)$!) {
+    elsif(m!^\s*/(.+)$!) {
          $node = $1;
          $type = 'item';
      }
+
+    # non-standard: Hyperlink with alt-text - doesn't remove protocol prefix, maybe it should?
+    elsif(m!^ \s* (.*?) \s* [|] \s* (\w+:[^:\s] [^\s|]*?) \s* $!ix) {
+      ($alttext,$node) = ($1,$2);
+      $type = 'hyperlink';
+    }
+
      # non-standard: Hyperlink
-    elsif(m!^((?:http|ftp|mailto|news):.+)$!i) {
+    elsif(m!^(\w+:[^:\s]\S*)$!i) {
          $node = $1;
          $type = 'hyperlink';
      }
      # alttext, page and item
-    elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)\s*/\s*(.+)$!) {
-        ($alttext, $page, $node) = ($1, $2 . $3, $4);
+    elsif(m!^(.*?)\s*[|]\s*($page_rx)\s*/\s*(.+)$!o) {
+        ($alttext, $page, $node) = ($1, $2, $3);
          $type = 'item';
      }
-    # alttext and page
-    elsif(m!^(.+?)\s*[|]\s*(\w+(?:::\w+)*)\s*(\(\w*\)|)$!) {
-        ($alttext, $page) = ($1, $2 . $3);
-        $type = 'page';
-    }
-    # alttext and section
-    elsif(m!^(.+?)\s*[|]\s*(?:/\s*|)"(.+)"$!) {
-        ($alttext, $node) = ($1,$2);
-        $type = 'section';
-    }
      # alttext and item
-    elsif(m!^(.+?)\s*[|]\s*/(.+)$!) {
+    elsif(m!^(.*?)\s*[|]\s*/(.+)$!) {
          ($alttext, $node) = ($1,$2);
      }
-    # nonstandard: alttext and hyperlink
-    elsif(m!^(.+?)\s*[|]\s*((?:http|ftp|mailto|news):.+)$!) {
-        ($alttext, $node) = ($1,$2);
-        $type = 'hyperlink';
-    }
      # must be an item or a "malformed" section (without "")
      else {
          $node = $_;
          $type = 'item';
      }
+    # collapse whitespace in nodes
+    $node =~ s/\s+/ /gs;
+
+    # empty alternative text expands to node name
+    if(defined $alttext) {
+        if(!length($alttext)) {
+          $alttext = $node | $page;
+        }
+    }
+    else {
+        $alttext = '';
+    }
  
      if($page =~ /[(]\w*[)]$/) {
-        $self->warning("section in `$page' deprecated");
+        $self->warning("(section) in '$page' deprecated");
+    }
+    if(!$quoted && $node =~ m:[|/]: && $type ne 'hyperlink') {
+        $self->warning("node '$node' contains non-escaped | or /");
+    }
+    if($alttext =~ m:[|/]:) {
+        $self->warning("alternative text '$node' contains non-escaped | or /");
      }
      $self->{-page} = $page;
      $self->{-node} = $node;
@@ -396,11 +428,9 @@ sub _construct_text {
          $self->{_text} = $section;
      }
      else {
-        $self->{_text} = (!$section ? '' : 
-            $type eq 'item' ? "the $section entry" :
-                "the section on $section" ) .
-            ($page ? ($section ? ' in ':'') . "the $page$page_ext manpage" :
-                ' elsewhere in this document');
+        $self->{_text} = ($section || '') .
+            (($page && $section) ? ' in ' : '') .
+            "$page$page_ext";
      }
      # for being marked up later
      # use the non-standard markers P<> and Q<>, so that the resulting
@@ -413,15 +443,12 @@ sub _construct_text {
          $self->{_markup} = "Q<$section>";
      }
      else {
-        $self->{_markup} = (!$section ? '' : 
-            $type eq 'item' ? "the Q<$section> entry" :
-                "the section on Q<$section>" ) .
-            ($page ? ($section ? ' in ':'') . "the P<$page>$page_ext manpage" :
-                ' elsewhere in this document');
+        $self->{_markup} = (!$section ? '' : "Q<$section>") .
+            ($page ? ($section ? ' in ':'') . "P<$page>$page_ext" : '');
      }
  }
  
-=item markup($string)
+=item $link-E<gt>markup($string)
  
  Set/retrieve the textual value of the link. This string contains special
  markers C<PE<lt>E<gt>> and C<QE<lt>E<gt>> that should be expanded by the
@@ -436,17 +463,17 @@ sub markup {
      return (@_ > 1) ? ($_[0]->{_markup} = $_[1]) : $_[0]->{_markup};
  }
  
-=item text()
+=item $link-E<gt>text()
  
  This method returns the textual representation of the hyperlink as above,
  but without markers (read only). Depending on the link type this is one of
  the following alternatives (the + and * denote the portions of the text
  that are marked up):
  
-  the +perl+ manpage
-  the *$|* entry in the +perlvar+ manpage
-  the section on *OPTIONS* in the +perldoc+ manpage
-  the section on *DESCRIPTION* elsewhere in this document
+  +perl+                    L<perl>
+  *$|* in +perlvar+         L<perlvar/$|>
+  *OPTIONS* in +perldoc+    L<perldoc/"OPTIONS">
+  *DESCRIPTION*             L<"DESCRIPTION">
  
  =cut
  
@@ -455,7 +482,7 @@ sub text {
      $_[0]->{_text};
  }
  
-=item warning()
+=item $link-E<gt>warning()
  
  After parsing, this method returns any warnings encountered during the
  parsing process.
@@ -472,7 +499,9 @@ sub warning {
      return @{$self->{_warnings}};
  }
  
-=item line(), file()
+=item $link-E<gt>file()
+
+=item $link-E<gt>line()
  
  Just simple slots for storing information about the line and the file
  the link was encountered in. Has to be filled in manually.
@@ -489,7 +518,7 @@ sub file {
      return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
  }
  
-=item page()
+=item $link-E<gt>page()
  
  This method sets or returns the POD page this link points to.
  
@@ -504,7 +533,7 @@ sub page {
      $_[0]->{-page};
  }
  
-=item node()
+=item $link-E<gt>node()
  
  As above, but the destination node text of the link.
  
@@ -519,7 +548,7 @@ sub node {
      $_[0]->{-node};
  }
  
-=item alttext()
+=item $link-E<gt>alttext()
  
  Sets or returns an alternative text specified in the link.
  
@@ -534,7 +563,7 @@ sub alttext {
      $_[0]->{-alttext};
  }
  
-=item type()
+=item $link-E<gt>type()
  
  The node type, either C<section> or C<item>. As an unofficial type,
  there is also C<hyperlink>, derived from e.g. C<LE<lt>http://perl.comE<gt>>
@@ -546,7 +575,7 @@ sub type {
      return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
  }
  
-=item link()
+=item $link-E<gt>link()
  
  Returns the link as contents of C<LE<lt>E<gt>>. Reciprocal to B<parse()>.
  
@@ -559,18 +588,24 @@ sub link {
      my $self = shift;
      my $link = $self->page() || '';
      if($self->node()) {
+        my $node = $self->node();
+        $text =~ s/\|/E<verbar>/g;
+        $text =~ s:/:E<sol>:g;
          if($self->type() eq 'section') {
-            $link .= ($link ? '/' : '') . '"' . $self->node() . '"';
+            $link .= ($link ? '/' : '') . '"' . $node . '"';
          }
          elsif($self->type() eq 'hyperlink') {
              $link = $self->node();
          }
          else { # item
-            $link .= '/' . $self->node();
+            $link .= '/' . $node;
          }
      }
      if($self->alttext()) {
-        $link = $self->alttext() . '|' . $link;
+        my $text = $self->alttext();
+        $text =~ s/\|/E<verbar>/g;
+        $text =~ s:/:E<sol>:g;
+        $link = "$text|$link";
      }
      $link;
  }
@@ -600,7 +635,7 @@ The following methods are available:
  
  =over 4
  
-=item new()
+=item Pod::Cache-E<gt>new()
  
  Create a new cache object. This object can hold an arbitrary number of
  POD documents of class Pod::Cache::Item.
@@ -615,7 +650,7 @@ sub new {
      return $self;
  }
  
-=item item()
+=item $cache-E<gt>item()
  
  Add a new item to the cache. Without arguments, this method returns a
  list of all cache elements.
@@ -634,7 +669,7 @@ sub item {
      }
  }
  
-=item find_page($name)
+=item $cache-E<gt>find_page($name)
  
  Look for a POD document named C<$name> in the cache. Returns the
  reference to the corresponding Pod::Cache::Item object or undef if
@@ -666,7 +701,7 @@ The following methods are available:
  
  =over 4
  
-=item new()
+=item Pod::Cache::Item-E<gt>new()
  
  Create a new object.
  
@@ -687,7 +722,7 @@ sub initialize {
      $self->{-nodes} = [] unless(defined $self->{-nodes});
  }
  
-=item page()
+=item $cacheitem-E<gt>page()
  
  Set/retrieve the POD document name (e.g. "Pod::Parser").
  
@@ -698,7 +733,7 @@ sub page {
     return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
  }
  
-=item description()
+=item $cacheitem-E<gt>description()
  
  Set/retrieve the POD short description as found in the C<=head1 NAME>
  section.
@@ -710,7 +745,7 @@ sub description {
     return (@_ > 1) ? ($_[0]->{-description} = $_[1]) : $_[0]->{-description};
  }
  
-=item path()
+=item $cacheitem-E<gt>path()
  
  Set/retrieve the POD file storage path.
  
@@ -721,7 +756,7 @@ sub path {
     return (@_ > 1) ? ($_[0]->{-path} = $_[1]) : $_[0]->{-path};
  }
  
-=item file()
+=item $cacheitem-E<gt>file()
  
  Set/retrieve the POD file name.
  
@@ -732,7 +767,7 @@ sub file {
     return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
  }
  
-=item nodes()
+=item $cacheitem-E<gt>nodes()
  
  Add a node (or a list of nodes) to the document's node list. Note that
  the order is kept, i.e. start with the first node and end with the last.
@@ -755,19 +790,20 @@ sub nodes {
      }
  }
  
-=item find_node($name)
-
-Look for a node named C<$name> in the object's node list. Returns the
-unique id of the node (i.e. the second element of the array stored in
-the node arry) or undef if not found.
+=item $cacheitem-E<gt>find_node($name)
  
-=back
+Look for a node or index entry named C<$name> in the object.
+Returns the unique id of the node (i.e. the second element of the array
+stored in the node array) or undef if not found.
  
  =cut
  
  sub find_node {
      my ($self,$node) = @_;
-    foreach(@{$self->{-nodes}}) {
+    my @search;
+    push(@search, @{$self->{-nodes}}) if($self->{-nodes});
+    push(@search, @{$self->{-idx}}) if($self->{-idx});
+    foreach(@search) {
          if($_->[0] eq $node) {
              return $_->[1]; # id
          }
@@ -775,10 +811,36 @@ sub find_node {
      undef;
  }
  
+=item $cacheitem-E<gt>idx()
+
+Add an index entry (or a list of them) to the document's index list. Note that
+the order is kept, i.e. start with the first node and end with the last.
+If no argument is given, the current list of index entries is returned in the
+same order the entries have been added.
+An index entry can be any scalar, but usually is a pair of string and
+unique id.
+
+=back
+
+=cut
+
+# The POD index entries
+sub idx {
+    my ($self,@idx) = @_;
+    if(@idx) {
+        push(@{$self->{-idx}}, @idx);
+        return @idx;
+    }
+    else {
+        return @{$self->{-idx}};
+    }
+}
  
  =head1 AUTHOR
  
-Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
+Please report bugs using L<http://rt.cpan.org>.
+
+Marek Rouchal E<lt>marekr@cpan.orgE<gt>, borrowing
  a lot of things from L<pod2man> and L<pod2roff> as well as other POD
  processing tools by Tom Christiansen, Brad Appleton and Russ Allbery.