lib/Pod/Text.pm

   1 # Pod::Text -- Convert POD data to formatted ASCII text.
   2 # $Id: Text.pm,v 2.11 2001/07/10 11:08:10 eagle Exp $
   3 #
   4 # Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>
   5 #
   6 # This program is free software; you may redistribute it and/or modify it
   7 # under the same terms as Perl itself.
   8 #
   9 # This module replaces the old Pod::Text that came with versions of Perl prior
  10 # to 5.6.0, and attempts to match its output except for some specific
  11 # circumstances where other decisions seemed to produce better output.  It
  12 # uses Pod::Parser and is designed to be very easy to subclass.
  13 #
  14 # Perl core hackers, please note that this module is also separately
  15 # maintained outside of the Perl core as part of the podlators.  Please send
  16 # me any patches at the address above in addition to sending them to the
  17 # standard Perl mailing lists.
  18
  19 ##############################################################################
  20 # Modules and declarations
  21 ##############################################################################
  22
  23 package Pod::Text;
  24
  25 require 5.004;
  26
  27 use Carp qw(carp croak);
  28 use Exporter ();
  29 use Pod::Select ();
  30
  31 use strict;
  32 use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
  33
  34 # We inherit from Pod::Select instead of Pod::Parser so that we can be used by
  35 # Pod::Usage.
  36 @ISA = qw(Pod::Select Exporter);
  37
  38 # We have to export pod2text for backward compatibility.
  39 @EXPORT = qw(pod2text);
  40
  41 # Don't use the CVS revision as the version, since this module is also in Perl
  42 # core and too many things could munge CVS magic revision strings.  This
  43 # number should ideally be the same as the CVS revision in podlators, however.
  44 $VERSION = 2.11;
  45
  46
  47 ##############################################################################
  48 # Table of supported E<> escapes
  49 ##############################################################################
  50
  51 # This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
  52 # got it near verbatim from the original Pod::Text.  It is therefore credited
  53 # to Tom Christiansen, and I'm glad I didn't have to write it.  :)  "iexcl" to
  54 # "divide" added by Tim Jenness.
  55 %ESCAPES = (
  56     'amp'       =>    '&',      # ampersand
  57     'lt'        =>    '<',      # left chevron, less-than
  58     'gt'        =>    '>',      # right chevron, greater-than
  59     'quot'      =>    '"',      # double quote
  60     'sol'       =>    '/',      # solidus (forward slash)
  61     'verbar'    =>    '|',      # vertical bar
  62
  63     "Aacute"    =>    "\xC1",   # capital A, acute accent
  64     "aacute"    =>    "\xE1",   # small a, acute accent
  65     "Acirc"     =>    "\xC2",   # capital A, circumflex accent
  66     "acirc"     =>    "\xE2",   # small a, circumflex accent
  67     "AElig"     =>    "\xC6",   # capital AE diphthong (ligature)
  68     "aelig"     =>    "\xE6",   # small ae diphthong (ligature)
  69     "Agrave"    =>    "\xC0",   # capital A, grave accent
  70     "agrave"    =>    "\xE0",   # small a, grave accent
  71     "Aring"     =>    "\xC5",   # capital A, ring
  72     "aring"     =>    "\xE5",   # small a, ring
  73     "Atilde"    =>    "\xC3",   # capital A, tilde
  74     "atilde"    =>    "\xE3",   # small a, tilde
  75     "Auml"      =>    "\xC4",   # capital A, dieresis or umlaut mark
  76     "auml"      =>    "\xE4",   # small a, dieresis or umlaut mark
  77     "Ccedil"    =>    "\xC7",   # capital C, cedilla
  78     "ccedil"    =>    "\xE7",   # small c, cedilla
  79     "Eacute"    =>    "\xC9",   # capital E, acute accent
  80     "eacute"    =>    "\xE9",   # small e, acute accent
  81     "Ecirc"     =>    "\xCA",   # capital E, circumflex accent
  82     "ecirc"     =>    "\xEA",   # small e, circumflex accent
  83     "Egrave"    =>    "\xC8",   # capital E, grave accent
  84     "egrave"    =>    "\xE8",   # small e, grave accent
  85     "ETH"       =>    "\xD0",   # capital Eth, Icelandic
  86     "eth"       =>    "\xF0",   # small eth, Icelandic
  87     "Euml"      =>    "\xCB",   # capital E, dieresis or umlaut mark
  88     "euml"      =>    "\xEB",   # small e, dieresis or umlaut mark
  89     "Iacute"    =>    "\xCD",   # capital I, acute accent
  90     "iacute"    =>    "\xED",   # small i, acute accent
  91     "Icirc"     =>    "\xCE",   # capital I, circumflex accent
  92     "icirc"     =>    "\xEE",   # small i, circumflex accent
  93     "Igrave"    =>    "\xCC",   # capital I, grave accent
  94     "igrave"    =>    "\xEC",   # small i, grave accent
  95     "Iuml"      =>    "\xCF",   # capital I, dieresis or umlaut mark
  96     "iuml"      =>    "\xEF",   # small i, dieresis or umlaut mark
  97     "Ntilde"    =>    "\xD1",   # capital N, tilde
  98     "ntilde"    =>    "\xF1",   # small n, tilde
  99     "Oacute"    =>    "\xD3",   # capital O, acute accent
 100     "oacute"    =>    "\xF3",   # small o, acute accent
 101     "Ocirc"     =>    "\xD4",   # capital O, circumflex accent
 102     "ocirc"     =>    "\xF4",   # small o, circumflex accent
 103     "Ograve"    =>    "\xD2",   # capital O, grave accent
 104     "ograve"    =>    "\xF2",   # small o, grave accent
 105     "Oslash"    =>    "\xD8",   # capital O, slash
 106     "oslash"    =>    "\xF8",   # small o, slash
 107     "Otilde"    =>    "\xD5",   # capital O, tilde
 108     "otilde"    =>    "\xF5",   # small o, tilde
 109     "Ouml"      =>    "\xD6",   # capital O, dieresis or umlaut mark
 110     "ouml"      =>    "\xF6",   # small o, dieresis or umlaut mark
 111     "szlig"     =>    "\xDF",   # small sharp s, German (sz ligature)
 112     "THORN"     =>    "\xDE",   # capital THORN, Icelandic
 113     "thorn"     =>    "\xFE",   # small thorn, Icelandic
 114     "Uacute"    =>    "\xDA",   # capital U, acute accent
 115     "uacute"    =>    "\xFA",   # small u, acute accent
 116     "Ucirc"     =>    "\xDB",   # capital U, circumflex accent
 117     "ucirc"     =>    "\xFB",   # small u, circumflex accent
 118     "Ugrave"    =>    "\xD9",   # capital U, grave accent
 119     "ugrave"    =>    "\xF9",   # small u, grave accent
 120     "Uuml"      =>    "\xDC",   # capital U, dieresis or umlaut mark
 121     "uuml"      =>    "\xFC",   # small u, dieresis or umlaut mark
 122     "Yacute"    =>    "\xDD",   # capital Y, acute accent
 123     "yacute"    =>    "\xFD",   # small y, acute accent
 124     "yuml"      =>    "\xFF",   # small y, dieresis or umlaut mark
 125
 126     "laquo"     =>    "\xAB",   # left pointing double angle quotation mark
 127     "lchevron"  =>    "\xAB",   #  synonym (backwards compatibility)
 128     "raquo"     =>    "\xBB",   # right pointing double angle quotation mark
 129     "rchevron"  =>    "\xBB",   #  synonym (backwards compatibility)
 130
 131     "iexcl"     =>    "\xA1",   # inverted exclamation mark
 132     "cent"      =>    "\xA2",   # cent sign
 133     "pound"     =>    "\xA3",   # (UK) pound sign
 134     "curren"    =>    "\xA4",   # currency sign
 135     "yen"       =>    "\xA5",   # yen sign
 136     "brvbar"    =>    "\xA6",   # broken vertical bar
 137     "sect"      =>    "\xA7",   # section sign
 138     "uml"       =>    "\xA8",   # diaresis
 139     "copy"      =>    "\xA9",   # Copyright symbol
 140     "ordf"      =>    "\xAA",   # feminine ordinal indicator
 141     "not"       =>    "\xAC",   # not sign
 142     "shy"       =>    "\xAD",   # soft hyphen
 143     "reg"       =>    "\xAE",   # registered trademark
 144     "macr"      =>    "\xAF",   # macron, overline
 145     "deg"       =>    "\xB0",   # degree sign
 146     "plusmn"    =>    "\xB1",   # plus-minus sign
 147     "sup2"      =>    "\xB2",   # superscript 2
 148     "sup3"      =>    "\xB3",   # superscript 3
 149     "acute"     =>    "\xB4",   # acute accent
 150     "micro"     =>    "\xB5",   # micro sign
 151     "para"      =>    "\xB6",   # pilcrow sign = paragraph sign
 152     "middot"    =>    "\xB7",   # middle dot = Georgian comma
 153     "cedil"     =>    "\xB8",   # cedilla
 154     "sup1"      =>    "\xB9",   # superscript 1
 155     "ordm"      =>    "\xBA",   # masculine ordinal indicator
 156     "frac14"    =>    "\xBC",   # vulgar fraction one quarter
 157     "frac12"    =>    "\xBD",   # vulgar fraction one half
 158     "frac34"    =>    "\xBE",   # vulgar fraction three quarters
 159     "iquest"    =>    "\xBF",   # inverted question mark
 160     "times"     =>    "\xD7",   # multiplication sign
 161     "divide"    =>    "\xF7",   # division sign
 162 );
 163
 164
 165 ##############################################################################
 166 # Initialization
 167 ##############################################################################
 168
 169 # Initialize the object.  Must be sure to call our parent initializer.
 170 sub initialize {
 171     my $self = shift;
 172
 173     $$self{alt}      = 0  unless defined $$self{alt};
 174     $$self{indent}   = 4  unless defined $$self{indent};
 175     $$self{loose}    = 0  unless defined $$self{loose};
 176     $$self{sentence} = 0  unless defined $$self{sentence};
 177     $$self{width}    = 76 unless defined $$self{width};
 178
 179     # Figure out what quotes we'll be using for C<> text.
 180     $$self{quotes} ||= '"';
 181     if ($$self{quotes} eq 'none') {
 182         $$self{LQUOTE} = $$self{RQUOTE} = '';
 183     } elsif (length ($$self{quotes}) == 1) {
 184         $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
 185     } elsif ($$self{quotes} =~ /^(.)(.)$/
 186              || $$self{quotes} =~ /^(..)(..)$/) {
 187         $$self{LQUOTE} = $1;
 188         $$self{RQUOTE} = $2;
 189     } else {
 190         croak qq(Invalid quote specification "$$self{quotes}");
 191     }
 192
 193     $$self{INDENTS}  = [];              # Stack of indentations.
 194     $$self{MARGIN}   = $$self{indent};  # Current left margin in spaces.
 195
 196     $self->SUPER::initialize;
 197 }
 198
 199
 200 ##############################################################################
 201 # Core overrides
 202 ##############################################################################
 203
 204 # Called for each command paragraph.  Gets the command, the associated
 205 # paragraph, the line number, and a Pod::Paragraph object.  Just dispatches
 206 # the command to a method named the same as the command.  =cut is handled
 207 # internally by Pod::Parser.
 208 sub command {
 209     my $self = shift;
 210     my $command = shift;
 211     return if $command eq 'pod';
 212     return if ($$self{EXCLUDE} && $command ne 'end');
 213     $self->item ("\n") if defined $$self{ITEM};
 214     if ($self->can ('cmd_' . $command)) {
 215         $command = 'cmd_' . $command;
 216         $self->$command (@_);
 217     } else {
 218         my ($text, $line, $paragraph) = @_;
 219         my $file;
 220         ($file, $line) = $paragraph->file_line;
 221         $text =~ s/\n+\z//;
 222         $text = " $text" if ($text =~ /^\S/);
 223         warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
 224         return;
 225     }
 226 }
 227
 228 # Called for a verbatim paragraph.  Gets the paragraph, the line number, and a
 229 # Pod::Paragraph object.  Just output it verbatim, but with tabs converted to
 230 # spaces.
 231 sub verbatim {
 232     my $self = shift;
 233     return if $$self{EXCLUDE};
 234     $self->item if defined $$self{ITEM};
 235     local $_ = shift;
 236     return if /^\s*$/;
 237     s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
 238     $self->output ($_);
 239 }
 240
 241 # Called for a regular text block.  Gets the paragraph, the line number, and a
 242 # Pod::Paragraph object.  Perform interpolation and output the results.
 243 sub textblock {
 244     my $self = shift;
 245     return if $$self{EXCLUDE};
 246     $self->output ($_[0]), return if $$self{VERBATIM};
 247     local $_ = shift;
 248     my $line = shift;
 249
 250     # Perform a little magic to collapse multiple L<> references.  This is
 251     # here mostly for backwards-compatibility.  We'll just rewrite the whole
 252     # thing into actual text at this part, bypassing the whole internal
 253     # sequence parsing thing.
 254     s{
 255         (
 256           L<                    # A link of the form L</something>.
 257               /
 258               (
 259                   [:\w]+        # The item has to be a simple word...
 260                   (\(\))?       # ...or simple function.
 261               )
 262           >
 263           (
 264               ,?\s+(and\s+)?    # Allow lots of them, conjuncted.
 265               L<
 266                   /
 267                   (
 268                       [:\w]+
 269                       (\(\))?
 270                   )
 271               >
 272           )+
 273         )
 274     } {
 275         local $_ = $1;
 276         s%L</([^>]+)>%$1%g;
 277         my @items = split /(?:,?\s+(?:and\s+)?)/;
 278         my $string = "the ";
 279         my $i;
 280         for ($i = 0; $i < @items; $i++) {
 281             $string .= $items[$i];
 282             $string .= ", " if @items > 2 && $i != $#items;
 283             $string .= " and " if ($i == $#items - 1);
 284         }
 285         $string .= " entries elsewhere in this document";
 286         $string;
 287     }gex;
 288
 289     # Now actually interpolate and output the paragraph.
 290     $_ = $self->interpolate ($_, $line);
 291     s/\s+$/\n/;
 292     if (defined $$self{ITEM}) {
 293         $self->item ($_ . "\n");
 294     } else {
 295         $self->output ($self->reformat ($_ . "\n"));
 296     }
 297 }
 298
 299 # Called for an interior sequence.  Gets the command, argument, and a
 300 # Pod::InteriorSequence object and is expected to return the resulting text.
 301 # Calls code, bold, italic, file, and link to handle those types of sequences,
 302 # and handles S<>, E<>, X<>, and Z<> directly.
 303 sub interior_sequence {
 304     my $self = shift;
 305     my $command = shift;
 306     local $_ = shift;
 307     return '' if ($command eq 'X' || $command eq 'Z');
 308
 309     # Expand escapes into the actual character now, carping if invalid.
 310     if ($command eq 'E') {
 311         if (/^\d+$/) {
 312             return chr;
 313         } else {
 314             return $ESCAPES{$_} if defined $ESCAPES{$_};
 315             carp "Unknown escape: E<$_>";
 316             return "E<$_>";
 317         }
 318     }
 319
 320     # For all the other sequences, empty content produces no output.
 321     return if $_ eq '';
 322
 323     # For S<>, compress all internal whitespace and then map spaces to \01.
 324     # When we output the text, we'll map this back.
 325     if ($command eq 'S') {
 326         s/\s{2,}/ /g;
 327         tr/ /\01/;
 328         return $_;
 329     }
 330
 331     # Anything else needs to get dispatched to another method.
 332     if    ($command eq 'B') { return $self->seq_b ($_) }
 333     elsif ($command eq 'C') { return $self->seq_c ($_) }
 334     elsif ($command eq 'F') { return $self->seq_f ($_) }
 335     elsif ($command eq 'I') { return $self->seq_i ($_) }
 336     elsif ($command eq 'L') { return $self->seq_l ($_) }
 337     else { carp "Unknown sequence $command<$_>" }
 338 }
 339
 340 # Called for each paragraph that's actually part of the POD.  We take
 341 # advantage of this opportunity to untabify the input.
 342 sub preprocess_paragraph {
 343     my $self = shift;
 344     local $_ = shift;
 345     1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
 346     $_;
 347 }
 348
 349
 350 ##############################################################################
 351 # Command paragraphs
 352 ##############################################################################
 353
 354 # All command paragraphs take the paragraph and the line number.
 355
 356 # First level heading.
 357 sub cmd_head1 {
 358     my $self = shift;
 359     local $_ = shift;
 360     s/\s+$//;
 361     $_ = $self->interpolate ($_, shift);
 362     if ($$self{alt}) {
 363         $self->output ("\n==== $_ ====\n\n");
 364     } else {
 365         $_ .= "\n" if $$self{loose};
 366         $self->output ($_ . "\n");
 367     }
 368 }
 369
 370 # Second level heading.
 371 sub cmd_head2 {
 372     my $self = shift;
 373     local $_ = shift;
 374     s/\s+$//;
 375     $_ = $self->interpolate ($_, shift);
 376     if ($$self{alt}) {
 377         $self->output ("\n==   $_   ==\n\n");
 378     } else {
 379         $self->output (' ' x ($$self{indent} / 2) . $_ . "\n\n");
 380     }
 381 }
 382
 383 # Third level heading.
 384 sub cmd_head3 {
 385     my $self = shift;
 386     local $_ = shift;
 387     s/\s+$//;
 388     $_ = $self->interpolate ($_, shift);
 389     if ($$self{alt}) {
 390         $self->output ("\n=    $_    =\n\n");
 391     } else {
 392         $self->output (' ' x ($$self{indent} * 2 / 3 + 0.5) . $_ . "\n\n");
 393     }
 394 }
 395
 396 # Third level heading.
 397 sub cmd_head4 {
 398     my $self = shift;
 399     local $_ = shift;
 400     s/\s+$//;
 401     $_ = $self->interpolate ($_, shift);
 402     if ($$self{alt}) {
 403         $self->output ("\n-    $_    -\n\n");
 404     } else {
 405         $self->output (' ' x ($$self{indent} * 3 / 4 + 0.5) . $_ . "\n\n");
 406     }
 407 }
 408
 409 # Start a list.
 410 sub cmd_over {
 411     my $self = shift;
 412     local $_ = shift;
 413     unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
 414     push (@{ $$self{INDENTS} }, $$self{MARGIN});
 415     $$self{MARGIN} += ($_ + 0);
 416 }
 417
 418 # End a list.
 419 sub cmd_back {
 420     my $self = shift;
 421     $$self{MARGIN} = pop @{ $$self{INDENTS} };
 422     unless (defined $$self{MARGIN}) {
 423         carp "Unmatched =back";
 424         $$self{MARGIN} = $$self{indent};
 425     }
 426 }
 427
 428 # An individual list item.
 429 sub cmd_item {
 430     my $self = shift;
 431     if (defined $$self{ITEM}) { $self->item }
 432     local $_ = shift;
 433     s/\s+$//;
 434     $$self{ITEM} = $self->interpolate ($_);
 435 }
 436
 437 # Begin a block for a particular translator.  Setting VERBATIM triggers
 438 # special handling in textblock().
 439 sub cmd_begin {
 440     my $self = shift;
 441     local $_ = shift;
 442     my ($kind) = /^(\S+)/ or return;
 443     if ($kind eq 'text') {
 444         $$self{VERBATIM} = 1;
 445     } else {
 446         $$self{EXCLUDE} = 1;
 447     }
 448 }
 449
 450 # End a block for a particular translator.  We assume that all =begin/=end
 451 # pairs are properly closed.
 452 sub cmd_end {
 453     my $self = shift;
 454     $$self{EXCLUDE} = 0;
 455     $$self{VERBATIM} = 0;
 456 }
 457
 458 # One paragraph for a particular translator.  Ignore it unless it's intended
 459 # for text, in which case we treat it as a verbatim text block.
 460 sub cmd_for {
 461     my $self = shift;
 462     local $_ = shift;
 463     my $line = shift;
 464     return unless s/^text\b[ \t]*\n?//;
 465     $self->verbatim ($_, $line);
 466 }
 467
 468
 469 ##############################################################################
 470 # Interior sequences
 471 ##############################################################################
 472
 473 # The simple formatting ones.  These are here mostly so that subclasses can
 474 # override them and do more complicated things.
 475 sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
 476 sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
 477 sub seq_i { return '*' . $_[1] . '*' }
 478
 479 # Apply a whole bunch of messy heuristics to not quote things that don't
 480 # benefit from being quoted.  These originally come from Barrie Slaymaker and
 481 # largely duplicate code in Pod::Man.
 482 sub seq_c {
 483     my $self = shift;
 484     local $_ = shift;
 485
 486     # A regex that matches the portion of a variable reference that's the
 487     # array or hash index, separated out just because we want to use it in
 488     # several places in the following regex.
 489     my $index = '(?: \[.*\] | \{.*\} )?';
 490
 491     # Check for things that we don't want to quote, and if we find any of
 492     # them, return the string with just a font change and no quoting.
 493     m{
 494       ^\s*
 495       (?:
 496          ( [\'\`\"] ) .* \1                             # already quoted
 497        | \` .* \'                                       # `quoted'
 498        | \$+ [\#^]? \S $index                           # special ($^Foo, $")
 499        | [\$\@%&*]+ \#? [:\'\w]+ $index                 # plain var or func
 500        | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
 501        | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )?             # a number
 502        | 0x [a-fA-F\d]+                                 # a hex constant
 503       )
 504       \s*\z
 505      }xo && return $_;
 506
 507     # If we didn't return, go ahead and quote the text.
 508     return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
 509 }
 510
 511 # The complicated one.  Handle links.  Since this is plain text, we can't
 512 # actually make any real links, so this is all to figure out what text we
 513 # print out.
 514 sub seq_l {
 515     my $self = shift;
 516     local $_ = shift;
 517
 518     # Smash whitespace in case we were split across multiple lines.
 519     s/\s+/ /g;
 520
 521     # If we were given any explicit text, just output it.
 522     if (/^([^|]+)\|/) { return $1 }
 523
 524     # Okay, leading and trailing whitespace isn't important; get rid of it.
 525     s/^\s+//;
 526     s/\s+$//;
 527
 528     # If the argument looks like a URL, return it verbatim.  This only handles
 529     # URLs that use the server syntax.
 530     if (m%^[a-z]+://\S+$%) { return $_ }
 531
 532     # Default to using the whole content of the link entry as a section name.
 533     # Note that L<manpage/> forces a manpage interpretation, as does something
 534     # looking like L<manpage(section)>.  The latter is an enhancement over the
 535     # original Pod::Text.
 536     my ($manpage, $section) = ('', $_);
 537     if (/^"\s*(.*?)\s*"$/) {
 538         $section = '"' . $1 . '"';
 539     } elsif (m/^[-:.\w]+(?:\(\S+\))?$/) {
 540         ($manpage, $section) = ($_, '');
 541     } elsif (m%/%) {
 542         ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
 543     }
 544
 545     # Now build the actual output text.
 546     my $text = '';
 547     if (!length $section) {
 548         $text = "the $manpage manpage" if length $manpage;
 549     } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
 550         $text .= 'the ' . $section . ' entry';
 551         $text .= (length $manpage) ? " in the $manpage manpage"
 552                                    : " elsewhere in this document";
 553     } else {
 554         $section =~ s/^\"\s*//;
 555         $section =~ s/\s*\"$//;
 556         $text .= 'the section on "' . $section . '"';
 557         $text .= " in the $manpage manpage" if length $manpage;
 558     }
 559     $text;
 560 }
 561
 562
 563 ##############################################################################
 564 # List handling
 565 ##############################################################################
 566
 567 # This method is called whenever an =item command is complete (in other words,
 568 # we've seen its associated paragraph or know for certain that it doesn't have
 569 # one).  It gets the paragraph associated with the item as an argument.  If
 570 # that argument is empty, just output the item tag; if it contains a newline,
 571 # output the item tag followed by the newline.  Otherwise, see if there's
 572 # enough room for us to output the item tag in the margin of the text or if we
 573 # have to put it on a separate line.
 574 sub item {
 575     my $self = shift;
 576     local $_ = shift;
 577     my $tag = $$self{ITEM};
 578     unless (defined $tag) {
 579         carp "item called without tag";
 580         return;
 581     }
 582     undef $$self{ITEM};
 583     my $indent = $$self{INDENTS}[-1];
 584     unless (defined $indent) { $indent = $$self{indent} }
 585     my $space = ' ' x $indent;
 586     $space =~ s/^ /:/ if $$self{alt};
 587     if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
 588         my $margin = $$self{MARGIN};
 589         $$self{MARGIN} = $indent;
 590         my $output = $self->reformat ($tag);
 591         $output =~ s/\n*$/\n/;
 592         $self->output ($output);
 593         $$self{MARGIN} = $margin;
 594         $self->output ($self->reformat ($_)) if /\S/;
 595     } else {
 596         $_ = $self->reformat ($_);
 597         s/^ /:/ if ($$self{alt} && $indent > 0);
 598         my $tagspace = ' ' x length $tag;
 599         s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
 600         $self->output ($_);
 601     }
 602 }
 603
 604
 605 ##############################################################################
 606 # Output formatting
 607 ##############################################################################
 608
 609 # Wrap a line, indenting by the current left margin.  We can't use Text::Wrap
 610 # because it plays games with tabs.  We can't use formline, even though we'd
 611 # really like to, because it screws up non-printing characters.  So we have to
 612 # do the wrapping ourselves.
 613 sub wrap {
 614     my $self = shift;
 615     local $_ = shift;
 616     my $output = '';
 617     my $spaces = ' ' x $$self{MARGIN};
 618     my $width = $$self{width} - $$self{MARGIN};
 619     while (length > $width) {
 620         if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
 621             $output .= $spaces . $1 . "\n";
 622         } else {
 623             last;
 624         }
 625     }
 626     $output .= $spaces . $_;
 627     $output =~ s/\s+$/\n\n/;
 628     $output;
 629 }
 630
 631 # Reformat a paragraph of text for the current margin.  Takes the text to
 632 # reformat and returns the formatted text.
 633 sub reformat {
 634     my $self = shift;
 635     local $_ = shift;
 636
 637     # If we're trying to preserve two spaces after sentences, do some munging
 638     # to support that.  Otherwise, smash all repeated whitespace.
 639     if ($$self{sentence}) {
 640         s/ +$//mg;
 641         s/\.\n/. \n/g;
 642         s/\n/ /g;
 643         s/   +/  /g;
 644     } else {
 645         s/\s+/ /g;
 646     }
 647     $self->wrap ($_);
 648 }
 649
 650 # Output text to the output device.
 651 sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
 652
 653
 654 ##############################################################################
 655 # Backwards compatibility
 656 ##############################################################################
 657
 658 # The old Pod::Text module did everything in a pod2text() function.  This
 659 # tries to provide the same interface for legacy applications.
 660 sub pod2text {
 661     my @args;
 662
 663     # This is really ugly; I hate doing option parsing in the middle of a
 664     # module.  But the old Pod::Text module supported passing flags to its
 665     # entry function, so handle -a and -<number>.
 666     while ($_[0] =~ /^-/) {
 667         my $flag = shift;
 668         if    ($flag eq '-a')       { push (@args, alt => 1)    }
 669         elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
 670         else {
 671             unshift (@_, $flag);
 672             last;
 673         }
 674     }
 675
 676     # Now that we know what arguments we're using, create the parser.
 677     my $parser = Pod::Text->new (@args);
 678
 679     # If two arguments were given, the second argument is going to be a file
 680     # handle.  That means we want to call parse_from_filehandle(), which means
 681     # we need to turn the first argument into a file handle.  Magic open will
 682     # handle the <&STDIN case automagically.
 683     if (defined $_[1]) {
 684         my @fhs = @_;
 685         local *IN;
 686         unless (open (IN, $fhs[0])) {
 687             croak ("Can't open $fhs[0] for reading: $!\n");
 688             return;
 689         }
 690         $fhs[0] = \*IN;
 691         return $parser->parse_from_filehandle (@fhs);
 692     } else {
 693         return $parser->parse_from_file (@_);
 694     }
 695 }
 696
 697
 698 ##############################################################################
 699 # Module return value and documentation
 700 ##############################################################################
 701
 702 1;
 703 __END__
 704
 705 =head1 NAME
 706
 707 Pod::Text - Convert POD data to formatted ASCII text
 708
 709 =head1 SYNOPSIS
 710
 711     use Pod::Text;
 712     my $parser = Pod::Text->new (sentence => 0, width => 78);
 713
 714     # Read POD from STDIN and write to STDOUT.
 715     $parser->parse_from_filehandle;
 716
 717     # Read POD from file.pod and write to file.txt.
 718     $parser->parse_from_file ('file.pod', 'file.txt');
 719
 720 =head1 DESCRIPTION
 721
 722 Pod::Text is a module that can convert documentation in the POD format (the
 723 preferred language for documenting Perl) into formatted ASCII.  It uses no
 724 special formatting controls or codes whatsoever, and its output is therefore
 725 suitable for nearly any device.
 726
 727 As a derived class from Pod::Parser, Pod::Text supports the same methods and
 728 interfaces.  See L<Pod::Parser> for all the details; briefly, one creates a
 729 new parser with C<Pod::Text-E<gt>new()> and then calls either
 730 parse_from_filehandle() or parse_from_file().
 731
 732 new() can take options, in the form of key/value pairs, that control the
 733 behavior of the parser.  The currently recognized options are:
 734
 735 =over 4
 736
 737 =item alt
 738
 739 If set to a true value, selects an alternate output format that, among other
 740 things, uses a different heading style and marks C<=item> entries with a
 741 colon in the left margin.  Defaults to false.
 742
 743 =item indent
 744
 745 The number of spaces to indent regular text, and the default indentation for
 746 C<=over> blocks.  Defaults to 4.
 747
 748 =item loose
 749
 750 If set to a true value, a blank line is printed after a C<=head1> heading.
 751 If set to false (the default), no blank line is printed after C<=head1>,
 752 although one is still printed after C<=head2>.  This is the default because
 753 it's the expected formatting for manual pages; if you're formatting
 754 arbitrary text documents, setting this to true may result in more pleasing
 755 output.
 756
 757 =item quotes
 758
 759 Sets the quote marks used to surround CE<lt>> text.  If the value is a
 760 single character, it is used as both the left and right quote; if it is two
 761 characters, the first character is used as the left quote and the second as
 762 the right quoted; and if it is four characters, the first two are used as
 763 the left quote and the second two as the right quote.
 764
 765 This may also be set to the special value C<none>, in which case no quote
 766 marks are added around CE<lt>> text.
 767
 768 =item sentence
 769
 770 If set to a true value, Pod::Text will assume that each sentence ends in two
 771 spaces, and will try to preserve that spacing.  If set to false, all
 772 consecutive whitespace in non-verbatim paragraphs is compressed into a
 773 single space.  Defaults to true.
 774
 775 =item width
 776
 777 The column at which to wrap text on the right-hand side.  Defaults to 76.
 778
 779 =back
 780
 781 The standard Pod::Parser method parse_from_filehandle() takes up to two
 782 arguments, the first being the file handle to read POD from and the second
 783 being the file handle to write the formatted output to.  The first defaults
 784 to STDIN if not given, and the second defaults to STDOUT.  The method
 785 parse_from_file() is almost identical, except that its two arguments are the
 786 input and output disk files instead.  See L<Pod::Parser> for the specific
 787 details.
 788
 789 =head1 DIAGNOSTICS
 790
 791 =over 4
 792
 793 =item Bizarre space in item
 794
 795 (W) Something has gone wrong in internal C<=item> processing.  This message
 796 indicates a bug in Pod::Text; you should never see it.
 797
 798 =item Can't open %s for reading: %s
 799
 800 (F) Pod::Text was invoked via the compatibility mode pod2text() interface
 801 and the input file it was given could not be opened.
 802
 803 =item Invalid quote specification "%s"
 804
 805 (F) The quote specification given (the quotes option to the constructor) was
 806 invalid.  A quote specification must be one, two, or four characters long.
 807
 808 =item %s:%d: Unknown command paragraph "%s".
 809
 810 (W) The POD source contained a non-standard command paragraph (something of
 811 the form C<=command args>) that Pod::Man didn't know about.  It was ignored.
 812
 813 =item Unknown escape: %s
 814
 815 (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
 816 know about.
 817
 818 =item Unknown sequence: %s
 819
 820 (W) The POD source contained a non-standard internal sequence (something of
 821 the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
 822
 823 =item Unmatched =back
 824
 825 (W) Pod::Text encountered a C<=back> command that didn't correspond to an
 826 C<=over> command.
 827
 828 =back
 829
 830 =head1 RESTRICTIONS
 831
 832 Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
 833 output, due to an internal implementation detail.
 834
 835 =head1 NOTES
 836
 837 This is a replacement for an earlier Pod::Text module written by Tom
 838 Christiansen.  It has a revamped interface, since it now uses Pod::Parser,
 839 but an interface roughly compatible with the old Pod::Text::pod2text()
 840 function is still available.  Please change to the new calling convention,
 841 though.
 842
 843 The original Pod::Text contained code to do formatting via termcap
 844 sequences, although it wasn't turned on by default and it was problematic to
 845 get it to work at all.  This rewrite doesn't even try to do that, but a
 846 subclass of it does.  Look for L<Pod::Text::Termcap|Pod::Text::Termcap>.
 847
 848 =head1 SEE ALSO
 849
 850 L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>,
 851 pod2text(1)
 852
 853 =head1 AUTHOR
 854
 855 Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
 856 original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and
 857 its conversion to Pod::Parser by Brad Appleton
 858 E<lt>bradapp@enteract.comE<gt>.
 859
 860 =head1 COPYRIGHT AND LICENSE
 861
 862 Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>.
 863
 864 This program is free software; you may redistribute it and/or modify it
 865 under the same terms as Perl itself.
 866
 867 =cut