lib/Pod/Text.pm

   1 # Pod::Text -- Convert POD data to formatted ASCII text.
   2 # $Id: Text.pm,v 2.21 2002/08/04 03:34:58 eagle Exp $
   3 #
   4 # Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>
   5 #
   6 # This program is free software; you may redistribute it and/or modify it
   7 # under the same terms as Perl itself.
   8 #
   9 # This module converts POD to formatted text.  It replaces the old Pod::Text
  10 # module that came with versions of Perl prior to 5.6.0 and attempts to match
  11 # its output except for some specific circumstances where other decisions
  12 # seemed to produce better output.  It uses Pod::Parser and is designed to be
  13 # very easy to subclass.
  14 #
  15 # Perl core hackers, please note that this module is also separately
  16 # maintained outside of the Perl core as part of the podlators.  Please send
  17 # me any patches at the address above in addition to sending them to the
  18 # standard Perl mailing lists.
  19
  20 ##############################################################################
  21 # Modules and declarations
  22 ##############################################################################
  23
  24 package Pod::Text;
  25
  26 require 5.004;
  27
  28 use Carp qw(carp croak);
  29 use Exporter ();
  30 use Pod::ParseLink qw(parselink);
  31 use Pod::Select ();
  32
  33 use strict;
  34 use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
  35
  36 # We inherit from Pod::Select instead of Pod::Parser so that we can be used by
  37 # Pod::Usage.
  38 @ISA = qw(Pod::Select Exporter);
  39
  40 # We have to export pod2text for backward compatibility.
  41 @EXPORT = qw(pod2text);
  42
  43 # Don't use the CVS revision as the version, since this module is also in Perl
  44 # core and too many things could munge CVS magic revision strings.  This
  45 # number should ideally be the same as the CVS revision in podlators, however.
  46 $VERSION = 2.21;
  47
  48
  49 ##############################################################################
  50 # Table of supported E<> escapes
  51 ##############################################################################
  52
  53 # This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
  54 # got it near verbatim from the original Pod::Text.  It is therefore credited
  55 # to Tom Christiansen, and I'm glad I didn't have to write it.  :)  "iexcl" to
  56 # "divide" added by Tim Jenness.
  57 %ESCAPES = (
  58     'amp'       =>    '&',      # ampersand
  59     'apos'      =>    "'",      # apostrophe
  60     'lt'        =>    '<',      # left chevron, less-than
  61     'gt'        =>    '>',      # right chevron, greater-than
  62     'quot'      =>    '"',      # double quote
  63     'sol'       =>    '/',      # solidus (forward slash)
  64     'verbar'    =>    '|',      # vertical bar
  65
  66     "Aacute"    =>    "\xC1",   # capital A, acute accent
  67     "aacute"    =>    "\xE1",   # small a, acute accent
  68     "Acirc"     =>    "\xC2",   # capital A, circumflex accent
  69     "acirc"     =>    "\xE2",   # small a, circumflex accent
  70     "AElig"     =>    "\xC6",   # capital AE diphthong (ligature)
  71     "aelig"     =>    "\xE6",   # small ae diphthong (ligature)
  72     "Agrave"    =>    "\xC0",   # capital A, grave accent
  73     "agrave"    =>    "\xE0",   # small a, grave accent
  74     "Aring"     =>    "\xC5",   # capital A, ring
  75     "aring"     =>    "\xE5",   # small a, ring
  76     "Atilde"    =>    "\xC3",   # capital A, tilde
  77     "atilde"    =>    "\xE3",   # small a, tilde
  78     "Auml"      =>    "\xC4",   # capital A, dieresis or umlaut mark
  79     "auml"      =>    "\xE4",   # small a, dieresis or umlaut mark
  80     "Ccedil"    =>    "\xC7",   # capital C, cedilla
  81     "ccedil"    =>    "\xE7",   # small c, cedilla
  82     "Eacute"    =>    "\xC9",   # capital E, acute accent
  83     "eacute"    =>    "\xE9",   # small e, acute accent
  84     "Ecirc"     =>    "\xCA",   # capital E, circumflex accent
  85     "ecirc"     =>    "\xEA",   # small e, circumflex accent
  86     "Egrave"    =>    "\xC8",   # capital E, grave accent
  87     "egrave"    =>    "\xE8",   # small e, grave accent
  88     "ETH"       =>    "\xD0",   # capital Eth, Icelandic
  89     "eth"       =>    "\xF0",   # small eth, Icelandic
  90     "Euml"      =>    "\xCB",   # capital E, dieresis or umlaut mark
  91     "euml"      =>    "\xEB",   # small e, dieresis or umlaut mark
  92     "Iacute"    =>    "\xCD",   # capital I, acute accent
  93     "iacute"    =>    "\xED",   # small i, acute accent
  94     "Icirc"     =>    "\xCE",   # capital I, circumflex accent
  95     "icirc"     =>    "\xEE",   # small i, circumflex accent
  96     "Igrave"    =>    "\xCC",   # capital I, grave accent
  97     "igrave"    =>    "\xEC",   # small i, grave accent
  98     "Iuml"      =>    "\xCF",   # capital I, dieresis or umlaut mark
  99     "iuml"      =>    "\xEF",   # small i, dieresis or umlaut mark
 100     "Ntilde"    =>    "\xD1",   # capital N, tilde
 101     "ntilde"    =>    "\xF1",   # small n, tilde
 102     "Oacute"    =>    "\xD3",   # capital O, acute accent
 103     "oacute"    =>    "\xF3",   # small o, acute accent
 104     "Ocirc"     =>    "\xD4",   # capital O, circumflex accent
 105     "ocirc"     =>    "\xF4",   # small o, circumflex accent
 106     "Ograve"    =>    "\xD2",   # capital O, grave accent
 107     "ograve"    =>    "\xF2",   # small o, grave accent
 108     "Oslash"    =>    "\xD8",   # capital O, slash
 109     "oslash"    =>    "\xF8",   # small o, slash
 110     "Otilde"    =>    "\xD5",   # capital O, tilde
 111     "otilde"    =>    "\xF5",   # small o, tilde
 112     "Ouml"      =>    "\xD6",   # capital O, dieresis or umlaut mark
 113     "ouml"      =>    "\xF6",   # small o, dieresis or umlaut mark
 114     "szlig"     =>    "\xDF",   # small sharp s, German (sz ligature)
 115     "THORN"     =>    "\xDE",   # capital THORN, Icelandic
 116     "thorn"     =>    "\xFE",   # small thorn, Icelandic
 117     "Uacute"    =>    "\xDA",   # capital U, acute accent
 118     "uacute"    =>    "\xFA",   # small u, acute accent
 119     "Ucirc"     =>    "\xDB",   # capital U, circumflex accent
 120     "ucirc"     =>    "\xFB",   # small u, circumflex accent
 121     "Ugrave"    =>    "\xD9",   # capital U, grave accent
 122     "ugrave"    =>    "\xF9",   # small u, grave accent
 123     "Uuml"      =>    "\xDC",   # capital U, dieresis or umlaut mark
 124     "uuml"      =>    "\xFC",   # small u, dieresis or umlaut mark
 125     "Yacute"    =>    "\xDD",   # capital Y, acute accent
 126     "yacute"    =>    "\xFD",   # small y, acute accent
 127     "yuml"      =>    "\xFF",   # small y, dieresis or umlaut mark
 128
 129     "laquo"     =>    "\xAB",   # left pointing double angle quotation mark
 130     "lchevron"  =>    "\xAB",   #  synonym (backwards compatibility)
 131     "raquo"     =>    "\xBB",   # right pointing double angle quotation mark
 132     "rchevron"  =>    "\xBB",   #  synonym (backwards compatibility)
 133
 134     "iexcl"     =>    "\xA1",   # inverted exclamation mark
 135     "cent"      =>    "\xA2",   # cent sign
 136     "pound"     =>    "\xA3",   # (UK) pound sign
 137     "curren"    =>    "\xA4",   # currency sign
 138     "yen"       =>    "\xA5",   # yen sign
 139     "brvbar"    =>    "\xA6",   # broken vertical bar
 140     "sect"      =>    "\xA7",   # section sign
 141     "uml"       =>    "\xA8",   # diaresis
 142     "copy"      =>    "\xA9",   # Copyright symbol
 143     "ordf"      =>    "\xAA",   # feminine ordinal indicator
 144     "not"       =>    "\xAC",   # not sign
 145     "shy"       =>    '',       # soft (discretionary) hyphen
 146     "reg"       =>    "\xAE",   # registered trademark
 147     "macr"      =>    "\xAF",   # macron, overline
 148     "deg"       =>    "\xB0",   # degree sign
 149     "plusmn"    =>    "\xB1",   # plus-minus sign
 150     "sup2"      =>    "\xB2",   # superscript 2
 151     "sup3"      =>    "\xB3",   # superscript 3
 152     "acute"     =>    "\xB4",   # acute accent
 153     "micro"     =>    "\xB5",   # micro sign
 154     "para"      =>    "\xB6",   # pilcrow sign = paragraph sign
 155     "middot"    =>    "\xB7",   # middle dot = Georgian comma
 156     "cedil"     =>    "\xB8",   # cedilla
 157     "sup1"      =>    "\xB9",   # superscript 1
 158     "ordm"      =>    "\xBA",   # masculine ordinal indicator
 159     "frac14"    =>    "\xBC",   # vulgar fraction one quarter
 160     "frac12"    =>    "\xBD",   # vulgar fraction one half
 161     "frac34"    =>    "\xBE",   # vulgar fraction three quarters
 162     "iquest"    =>    "\xBF",   # inverted question mark
 163     "times"     =>    "\xD7",   # multiplication sign
 164     "divide"    =>    "\xF7",   # division sign
 165
 166     "nbsp"      =>    "\x01",   # non-breaking space
 167 );
 168
 169
 170 ##############################################################################
 171 # Initialization
 172 ##############################################################################
 173
 174 # Initialize the object.  Must be sure to call our parent initializer.
 175 sub initialize {
 176     my $self = shift;
 177
 178     $$self{alt}      = 0  unless defined $$self{alt};
 179     $$self{indent}   = 4  unless defined $$self{indent};
 180     $$self{margin}   = 0  unless defined $$self{margin};
 181     $$self{loose}    = 0  unless defined $$self{loose};
 182     $$self{sentence} = 0  unless defined $$self{sentence};
 183     $$self{width}    = 76 unless defined $$self{width};
 184
 185     # Figure out what quotes we'll be using for C<> text.
 186     $$self{quotes} ||= '"';
 187     if ($$self{quotes} eq 'none') {
 188         $$self{LQUOTE} = $$self{RQUOTE} = '';
 189     } elsif (length ($$self{quotes}) == 1) {
 190         $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
 191     } elsif ($$self{quotes} =~ /^(.)(.)$/
 192              || $$self{quotes} =~ /^(..)(..)$/) {
 193         $$self{LQUOTE} = $1;
 194         $$self{RQUOTE} = $2;
 195     } else {
 196         croak qq(Invalid quote specification "$$self{quotes}");
 197     }
 198
 199     # Stack of indentations.
 200     $$self{INDENTS}  = [];
 201
 202     # Current left margin.
 203     $$self{MARGIN} = $$self{indent} + $$self{margin};
 204
 205     $self->SUPER::initialize;
 206
 207     # Tell Pod::Parser that we want the non-POD stuff too if code was set.
 208     $self->parseopts ('-want_nonPODs' => 1) if $$self{code};
 209 }
 210
 211
 212 ##############################################################################
 213 # Core overrides
 214 ##############################################################################
 215
 216 # Called for each command paragraph.  Gets the command, the associated
 217 # paragraph, the line number, and a Pod::Paragraph object.  Just dispatches
 218 # the command to a method named the same as the command.  =cut is handled
 219 # internally by Pod::Parser.
 220 sub command {
 221     my $self = shift;
 222     my $command = shift;
 223     return if $command eq 'pod';
 224     return if ($$self{EXCLUDE} && $command ne 'end');
 225     if ($self->can ('cmd_' . $command)) {
 226         $command = 'cmd_' . $command;
 227         $self->$command (@_);
 228     } else {
 229         my ($text, $line, $paragraph) = @_;
 230         my $file;
 231         ($file, $line) = $paragraph->file_line;
 232         $text =~ s/\n+\z//;
 233         $text = " $text" if ($text =~ /^\S/);
 234         warn qq($file:$line: Unknown command paragraph: =$command$text\n);
 235         return;
 236     }
 237 }
 238
 239 # Called for a verbatim paragraph.  Gets the paragraph, the line number, and a
 240 # Pod::Paragraph object.  Just output it verbatim, but with tabs converted to
 241 # spaces.
 242 sub verbatim {
 243     my $self = shift;
 244     return if $$self{EXCLUDE};
 245     $self->item if defined $$self{ITEM};
 246     local $_ = shift;
 247     return if /^\s*$/;
 248     s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
 249     $self->output ($_);
 250 }
 251
 252 # Called for a regular text block.  Gets the paragraph, the line number, and a
 253 # Pod::Paragraph object.  Perform interpolation and output the results.
 254 sub textblock {
 255     my $self = shift;
 256     return if $$self{EXCLUDE};
 257     $self->output ($_[0]), return if $$self{VERBATIM};
 258     local $_ = shift;
 259     my $line = shift;
 260
 261     # Interpolate and output the paragraph.
 262     $_ = $self->interpolate ($_, $line);
 263     s/\s+$/\n/;
 264     if (defined $$self{ITEM}) {
 265         $self->item ($_ . "\n");
 266     } else {
 267         $self->output ($self->reformat ($_ . "\n"));
 268     }
 269 }
 270
 271 # Called for a formatting code.  Gets the command, argument, and a
 272 # Pod::InteriorSequence object and is expected to return the resulting text.
 273 # Calls methods for code, bold, italic, file, and link to handle those types
 274 # of codes, and handles S<>, E<>, X<>, and Z<> directly.
 275 sub interior_sequence {
 276     local $_;
 277     my ($self, $command, $seq);
 278     ($self, $command, $_, $seq) = @_;
 279
 280     # We have to defer processing of the inside of an L<> formatting code.  If
 281     # this code is nested inside an L<> code, return the literal raw text of
 282     # it.
 283     my $parent = $seq->nested;
 284     while (defined $parent) {
 285         return $seq->raw_text if ($parent->cmd_name eq 'L');
 286         $parent = $parent->nested;
 287     }
 288
 289     # Index entries are ignored in plain text.
 290     return '' if ($command eq 'X' || $command eq 'Z');
 291
 292     # Expand escapes into the actual character now, warning if invalid.
 293     if ($command eq 'E') {
 294         if (/^\d+$/) {
 295             return chr;
 296         } else {
 297             return $ESCAPES{$_} if defined $ESCAPES{$_};
 298             my ($file, $line) = $seq->file_line;
 299             warn "$file:$line: Unknown escape: E<$_>\n";
 300             return "E<$_>";
 301         }
 302     }
 303
 304     # For all the other formatting codes, empty content produces no output.
 305     return if $_ eq '';
 306
 307     # For S<>, compress all internal whitespace and then map spaces to \01.
 308     # When we output the text, we'll map this back.
 309     if ($command eq 'S') {
 310         s/\s+/ /g;
 311         tr/ /\01/;
 312         return $_;
 313     }
 314
 315     # Anything else needs to get dispatched to another method.
 316     if    ($command eq 'B') { return $self->seq_b ($_) }
 317     elsif ($command eq 'C') { return $self->seq_c ($_) }
 318     elsif ($command eq 'F') { return $self->seq_f ($_) }
 319     elsif ($command eq 'I') { return $self->seq_i ($_) }
 320     elsif ($command eq 'L') { return $self->seq_l ($_, $seq) }
 321     else {
 322         my ($file, $line) = $seq->file_line;
 323         warn "$file:$line: Unknown formatting code: $command<$_>\n";
 324     }
 325 }
 326
 327 # Called for each paragraph that's actually part of the POD.  We take
 328 # advantage of this opportunity to untabify the input.  Also, if given the
 329 # code option, we may see paragraphs that aren't part of the POD and need to
 330 # output them directly.
 331 sub preprocess_paragraph {
 332     my $self = shift;
 333     local $_ = shift;
 334     1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
 335     $self->output_code ($_) if $self->cutting;
 336     $_;
 337 }
 338
 339
 340 ##############################################################################
 341 # Command paragraphs
 342 ##############################################################################
 343
 344 # All command paragraphs take the paragraph and the line number.
 345
 346 # First level heading.
 347 sub cmd_head1 {
 348     my ($self, $text, $line) = @_;
 349     $self->heading ($text, $line, 0, '====');
 350 }
 351
 352 # Second level heading.
 353 sub cmd_head2 {
 354     my ($self, $text, $line) = @_;
 355     $self->heading ($text, $line, $$self{indent} / 2, '==  ');
 356 }
 357
 358 # Third level heading.
 359 sub cmd_head3 {
 360     my ($self, $text, $line) = @_;
 361     $self->heading ($text, $line, $$self{indent} * 2 / 3 + 0.5, '=   ');
 362 }
 363
 364 # Third level heading.
 365 sub cmd_head4 {
 366     my ($self, $text, $line) = @_;
 367     $self->heading ($text, $line, $$self{indent} * 3 / 4 + 0.5, '-   ');
 368 }
 369
 370 # Start a list.
 371 sub cmd_over {
 372     my $self = shift;
 373     local $_ = shift;
 374     $self->item ("\n\n") if defined $$self{ITEM};
 375     unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
 376     push (@{ $$self{INDENTS} }, $$self{MARGIN});
 377     $$self{MARGIN} += ($_ + 0);
 378 }
 379
 380 # End a list.
 381 sub cmd_back {
 382     my ($self, $text, $line, $paragraph) = @_;
 383     $self->item ("\n\n") if defined $$self{ITEM};
 384     $$self{MARGIN} = pop @{ $$self{INDENTS} };
 385     unless (defined $$self{MARGIN}) {
 386         my $file;
 387         ($file, $line) = $paragraph->file_line;
 388         warn "$file:$line: Unmatched =back\n";
 389         $$self{MARGIN} = $$self{indent};
 390     }
 391 }
 392
 393 # An individual list item.
 394 sub cmd_item {
 395     my $self = shift;
 396     if (defined $$self{ITEM}) { $self->item }
 397     local $_ = shift;
 398     s/\s+$//;
 399     $$self{ITEM} = $_ ? $self->interpolate ($_) : '*';
 400 }
 401
 402 # Begin a block for a particular translator.  Setting VERBATIM triggers
 403 # special handling in textblock().
 404 sub cmd_begin {
 405     my $self = shift;
 406     local $_ = shift;
 407     my ($kind) = /^(\S+)/ or return;
 408     if ($kind eq 'text') {
 409         $$self{VERBATIM} = 1;
 410     } else {
 411         $$self{EXCLUDE} = 1;
 412     }
 413 }
 414
 415 # End a block for a particular translator.  We assume that all =begin/=end
 416 # pairs are properly closed.
 417 sub cmd_end {
 418     my $self = shift;
 419     $$self{EXCLUDE} = 0;
 420     $$self{VERBATIM} = 0;
 421 }
 422
 423 # One paragraph for a particular translator.  Ignore it unless it's intended
 424 # for text, in which case we treat it as a verbatim text block.
 425 sub cmd_for {
 426     my $self = shift;
 427     local $_ = shift;
 428     my $line = shift;
 429     return unless s/^text\b[ \t]*\n?//;
 430     $self->verbatim ($_, $line);
 431 }
 432
 433
 434 ##############################################################################
 435 # Formatting codes
 436 ##############################################################################
 437
 438 # The simple ones.  These are here mostly so that subclasses can override them
 439 # and do more complicated things.
 440 sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
 441 sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
 442 sub seq_i { return '*' . $_[1] . '*' }
 443
 444 # Apply a whole bunch of messy heuristics to not quote things that don't
 445 # benefit from being quoted.  These originally come from Barrie Slaymaker and
 446 # largely duplicate code in Pod::Man.
 447 sub seq_c {
 448     my $self = shift;
 449     local $_ = shift;
 450
 451     # A regex that matches the portion of a variable reference that's the
 452     # array or hash index, separated out just because we want to use it in
 453     # several places in the following regex.
 454     my $index = '(?: \[.*\] | \{.*\} )?';
 455
 456     # Check for things that we don't want to quote, and if we find any of
 457     # them, return the string with just a font change and no quoting.
 458     m{
 459       ^\s*
 460       (?:
 461          ( [\'\`\"] ) .* \1                             # already quoted
 462        | \` .* \'                                       # `quoted'
 463        | \$+ [\#^]? \S $index                           # special ($^Foo, $")
 464        | [\$\@%&*]+ \#? [:\'\w]+ $index                 # plain var or func
 465        | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
 466        | [+-]? ( \d[\d.]* | \.\d+ ) (?: [eE][+-]?\d+ )? # a number
 467        | 0x [a-fA-F\d]+                                 # a hex constant
 468       )
 469       \s*\z
 470      }xo && return $_;
 471
 472     # If we didn't return, go ahead and quote the text.
 473     return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
 474 }
 475
 476 # Handle links.  Since this is plain text, we can't actually make any real
 477 # links, so this is all to figure out what text we print out.  Most of the
 478 # work is done by Pod::ParseLink.
 479 sub seq_l {
 480     my ($self, $link, $seq) = @_;
 481     my ($text, $type) = (parselink ($link))[1,4];
 482     my ($file, $line) = $seq->file_line;
 483     $text = $self->interpolate ($text, $line);
 484     $text = '<' . $text . '>' if $type eq 'url';
 485     return $text || '';
 486 }
 487
 488
 489 ##############################################################################
 490 # Header handling
 491 ##############################################################################
 492
 493 # The common code for handling all headers.  Takes the interpolated header
 494 # text, the line number, the indentation, and the surrounding marker for the
 495 # alt formatting method.
 496 sub heading {
 497     my ($self, $text, $line, $indent, $marker) = @_;
 498     $self->item ("\n\n") if defined $$self{ITEM};
 499     $text =~ s/\s+$//;
 500     $text = $self->interpolate ($text, $line);
 501     if ($$self{alt}) {
 502         my $closemark = reverse (split (//, $marker));
 503         my $margin = ' ' x $$self{margin};
 504         $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n");
 505     } else {
 506         $text .= "\n" if $$self{loose};
 507         my $margin = ' ' x ($$self{margin} + $indent);
 508         $self->output ($margin . $text . "\n");
 509     }
 510 }
 511
 512
 513 ##############################################################################
 514 # List handling
 515 ##############################################################################
 516
 517 # This method is called whenever an =item command is complete (in other words,
 518 # we've seen its associated paragraph or know for certain that it doesn't have
 519 # one).  It gets the paragraph associated with the item as an argument.  If
 520 # that argument is empty, just output the item tag; if it contains a newline,
 521 # output the item tag followed by the newline.  Otherwise, see if there's
 522 # enough room for us to output the item tag in the margin of the text or if we
 523 # have to put it on a separate line.
 524 sub item {
 525     my $self = shift;
 526     local $_ = shift;
 527     my $tag = $$self{ITEM};
 528     unless (defined $tag) {
 529         carp "Item called without tag";
 530         return;
 531     }
 532     undef $$self{ITEM};
 533     my $indent = $$self{INDENTS}[-1];
 534     unless (defined $indent) { $indent = $$self{indent} }
 535     my $margin = ' ' x $$self{margin};
 536     if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
 537         my $realindent = $$self{MARGIN};
 538         $$self{MARGIN} = $indent;
 539         my $output = $self->reformat ($tag);
 540         $output =~ s/^$margin /$margin:/ if ($$self{alt} && $indent > 0);
 541         $output =~ s/\n*$/\n/;
 542
 543         # If the text is just whitespace, we have an empty item paragraph;
 544         # this can result from =over/=item/=back without any intermixed
 545         # paragraphs.  Insert some whitespace to keep the =item from merging
 546         # into the next paragraph.
 547         $output .= "\n" if $_ && $_ =~ /^\s*$/;
 548
 549         $self->output ($output);
 550         $$self{MARGIN} = $realindent;
 551         $self->output ($self->reformat ($_)) if $_ && /\S/;
 552     } else {
 553         my $space = ' ' x $indent;
 554         $space =~ s/^$margin /$margin:/ if $$self{alt};
 555         $_ = $self->reformat ($_);
 556         s/^$margin /$margin:/ if ($$self{alt} && $indent > 0);
 557         my $tagspace = ' ' x length $tag;
 558         s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
 559         $self->output ($_);
 560     }
 561 }
 562
 563
 564 ##############################################################################
 565 # Output formatting
 566 ##############################################################################
 567
 568 # Wrap a line, indenting by the current left margin.  We can't use Text::Wrap
 569 # because it plays games with tabs.  We can't use formline, even though we'd
 570 # really like to, because it screws up non-printing characters.  So we have to
 571 # do the wrapping ourselves.
 572 sub wrap {
 573     my $self = shift;
 574     local $_ = shift;
 575     my $output = '';
 576     my $spaces = ' ' x $$self{MARGIN};
 577     my $width = $$self{width} - $$self{MARGIN};
 578     while (length > $width) {
 579         if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
 580             $output .= $spaces . $1 . "\n";
 581         } else {
 582             last;
 583         }
 584     }
 585     $output .= $spaces . $_;
 586     $output =~ s/\s+$/\n\n/;
 587     $output;
 588 }
 589
 590 # Reformat a paragraph of text for the current margin.  Takes the text to
 591 # reformat and returns the formatted text.
 592 sub reformat {
 593     my $self = shift;
 594     local $_ = shift;
 595
 596     # If we're trying to preserve two spaces after sentences, do some munging
 597     # to support that.  Otherwise, smash all repeated whitespace.
 598     if ($$self{sentence}) {
 599         s/ +$//mg;
 600         s/\.\n/. \n/g;
 601         s/\n/ /g;
 602         s/   +/  /g;
 603     } else {
 604         s/\s+/ /g;
 605     }
 606     $self->wrap ($_);
 607 }
 608
 609 # Output text to the output device.
 610 sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
 611
 612 # Output a block of code (something that isn't part of the POD text).  Called
 613 # by preprocess_paragraph only if we were given the code option.  Exists here
 614 # only so that it can be overridden by subclasses.
 615 sub output_code { $_[0]->output ($_[1]) }
 616
 617
 618 ##############################################################################
 619 # Backwards compatibility
 620 ##############################################################################
 621
 622 # The old Pod::Text module did everything in a pod2text() function.  This
 623 # tries to provide the same interface for legacy applications.
 624 sub pod2text {
 625     my @args;
 626
 627     # This is really ugly; I hate doing option parsing in the middle of a
 628     # module.  But the old Pod::Text module supported passing flags to its
 629     # entry function, so handle -a and -<number>.
 630     while ($_[0] =~ /^-/) {
 631         my $flag = shift;
 632         if    ($flag eq '-a')       { push (@args, alt => 1)    }
 633         elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
 634         else {
 635             unshift (@_, $flag);
 636             last;
 637         }
 638     }
 639
 640     # Now that we know what arguments we're using, create the parser.
 641     my $parser = Pod::Text->new (@args);
 642
 643     # If two arguments were given, the second argument is going to be a file
 644     # handle.  That means we want to call parse_from_filehandle(), which means
 645     # we need to turn the first argument into a file handle.  Magic open will
 646     # handle the <&STDIN case automagically.
 647     if (defined $_[1]) {
 648         my @fhs = @_;
 649         local *IN;
 650         unless (open (IN, $fhs[0])) {
 651             croak ("Can't open $fhs[0] for reading: $!\n");
 652             return;
 653         }
 654         $fhs[0] = \*IN;
 655         return $parser->parse_from_filehandle (@fhs);
 656     } else {
 657         return $parser->parse_from_file (@_);
 658     }
 659 }
 660
 661
 662 ##############################################################################
 663 # Module return value and documentation
 664 ##############################################################################
 665
 666 1;
 667 __END__
 668
 669 =head1 NAME
 670
 671 Pod::Text - Convert POD data to formatted ASCII text
 672
 673 =head1 SYNOPSIS
 674
 675     use Pod::Text;
 676     my $parser = Pod::Text->new (sentence => 0, width => 78);
 677
 678     # Read POD from STDIN and write to STDOUT.
 679     $parser->parse_from_filehandle;
 680
 681     # Read POD from file.pod and write to file.txt.
 682     $parser->parse_from_file ('file.pod', 'file.txt');
 683
 684 =head1 DESCRIPTION
 685
 686 Pod::Text is a module that can convert documentation in the POD format (the
 687 preferred language for documenting Perl) into formatted ASCII.  It uses no
 688 special formatting controls or codes whatsoever, and its output is therefore
 689 suitable for nearly any device.
 690
 691 As a derived class from Pod::Parser, Pod::Text supports the same methods and
 692 interfaces.  See L<Pod::Parser> for all the details; briefly, one creates a
 693 new parser with C<< Pod::Text->new() >> and then calls either
 694 parse_from_filehandle() or parse_from_file().
 695
 696 new() can take options, in the form of key/value pairs, that control the
 697 behavior of the parser.  The currently recognized options are:
 698
 699 =over 4
 700
 701 =item alt
 702
 703 If set to a true value, selects an alternate output format that, among other
 704 things, uses a different heading style and marks C<=item> entries with a
 705 colon in the left margin.  Defaults to false.
 706
 707 =item code
 708
 709 If set to a true value, the non-POD parts of the input file will be included
 710 in the output.  Useful for viewing code documented with POD blocks with the
 711 POD rendered and the code left intact.
 712
 713 =item indent
 714
 715 The number of spaces to indent regular text, and the default indentation for
 716 C<=over> blocks.  Defaults to 4.
 717
 718 =item loose
 719
 720 If set to a true value, a blank line is printed after a C<=head1> heading.
 721 If set to false (the default), no blank line is printed after C<=head1>,
 722 although one is still printed after C<=head2>.  This is the default because
 723 it's the expected formatting for manual pages; if you're formatting
 724 arbitrary text documents, setting this to true may result in more pleasing
 725 output.
 726
 727 =item margin
 728
 729 The width of the left margin in spaces.  Defaults to 0.  This is the margin
 730 for all text, including headings, not the amount by which regular text is
 731 indented; for the latter, see the I<indent> option.  To set the right
 732 margin, see the I<width> option.
 733
 734 =item quotes
 735
 736 Sets the quote marks used to surround CE<lt>> text.  If the value is a
 737 single character, it is used as both the left and right quote; if it is two
 738 characters, the first character is used as the left quote and the second as
 739 the right quoted; and if it is four characters, the first two are used as
 740 the left quote and the second two as the right quote.
 741
 742 This may also be set to the special value C<none>, in which case no quote
 743 marks are added around CE<lt>> text.
 744
 745 =item sentence
 746
 747 If set to a true value, Pod::Text will assume that each sentence ends in two
 748 spaces, and will try to preserve that spacing.  If set to false, all
 749 consecutive whitespace in non-verbatim paragraphs is compressed into a
 750 single space.  Defaults to true.
 751
 752 =item width
 753
 754 The column at which to wrap text on the right-hand side.  Defaults to 76.
 755
 756 =back
 757
 758 The standard Pod::Parser method parse_from_filehandle() takes up to two
 759 arguments, the first being the file handle to read POD from and the second
 760 being the file handle to write the formatted output to.  The first defaults
 761 to STDIN if not given, and the second defaults to STDOUT.  The method
 762 parse_from_file() is almost identical, except that its two arguments are the
 763 input and output disk files instead.  See L<Pod::Parser> for the specific
 764 details.
 765
 766 =head1 DIAGNOSTICS
 767
 768 =over 4
 769
 770 =item Bizarre space in item
 771
 772 =item Item called without tag
 773
 774 (W) Something has gone wrong in internal C<=item> processing.  These
 775 messages indicate a bug in Pod::Text; you should never see them.
 776
 777 =item Can't open %s for reading: %s
 778
 779 (F) Pod::Text was invoked via the compatibility mode pod2text() interface
 780 and the input file it was given could not be opened.
 781
 782 =item Invalid quote specification "%s"
 783
 784 (F) The quote specification given (the quotes option to the constructor) was
 785 invalid.  A quote specification must be one, two, or four characters long.
 786
 787 =item %s:%d: Unknown command paragraph: %s
 788
 789 (W) The POD source contained a non-standard command paragraph (something of
 790 the form C<=command args>) that Pod::Man didn't know about.  It was ignored.
 791
 792 =item %s:%d: Unknown escape: %s
 793
 794 (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
 795 know about.
 796
 797 =item %s:%d: Unknown formatting code: %s
 798
 799 (W) The POD source contained a non-standard formatting code (something of
 800 the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
 801
 802 =item %s:%d: Unmatched =back
 803
 804 (W) Pod::Text encountered a C<=back> command that didn't correspond to an
 805 C<=over> command.
 806
 807 =back
 808
 809 =head1 RESTRICTIONS
 810
 811 Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
 812 output, due to an internal implementation detail.
 813
 814 =head1 NOTES
 815
 816 This is a replacement for an earlier Pod::Text module written by Tom
 817 Christiansen.  It has a revamped interface, since it now uses Pod::Parser,
 818 but an interface roughly compatible with the old Pod::Text::pod2text()
 819 function is still available.  Please change to the new calling convention,
 820 though.
 821
 822 The original Pod::Text contained code to do formatting via termcap
 823 sequences, although it wasn't turned on by default and it was problematic to
 824 get it to work at all.  This rewrite doesn't even try to do that, but a
 825 subclass of it does.  Look for L<Pod::Text::Termcap>.
 826
 827 =head1 SEE ALSO
 828
 829 L<Pod::Parser>, L<Pod::Text::Termcap>, L<pod2text(1)>
 830
 831 The current version of this module is always available from its web site at
 832 L<http://www.eyrie.org/~eagle/software/podlators/>.  It is also part of the
 833 Perl core distribution as of 5.6.0.
 834
 835 =head1 AUTHOR
 836
 837 Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
 838 Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
 839 Pod::Parser by Brad Appleton <bradapp@enteract.com>.
 840
 841 =head1 COPYRIGHT AND LICENSE
 842
 843 Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>.
 844
 845 This program is free software; you may redistribute it and/or modify it
 846 under the same terms as Perl itself.
 847
 848 =cut